示例#1
0
def launch_eval(black_num=0, white_num=0):
    if black_num <= 0 or white_num <= 0:
        print("Need real model numbers")
        return

    b = fsdb.get_model(black_num)
    w = fsdb.get_model(white_num)

    b_model_path = os.path.join(fsdb.models_dir(), b)
    w_model_path = os.path.join(fsdb.models_dir(), w)

    kubernetes.config.load_kube_config()
    configuration = kubernetes.client.Configuration()
    api_instance = kubernetes.client.BatchV1Api(
        kubernetes.client.ApiClient(configuration))

    raw_job_conf = open("cluster/evaluator/gpu-evaluator.yaml").read()
    env_job_conf = os.path.expandvars(raw_job_conf)

    t = jinja2.Template(env_job_conf)
    job_conf = yaml.load(t.render({'white': w_model_path,
                                   'black': b_model_path,
                                   'wnum': white_num,
                                   'bnum': black_num}))

    resp = api_instance.create_namespaced_job('default', body=job_conf)

    job_conf = yaml.load(t.render({'white': b_model_path,
                                   'black': w_model_path,
                                   'wnum': black_num,
                                   'bnum': white_num}))

    resp = api_instance.create_namespaced_job('default', body=job_conf)
示例#2
0
def same_run_eval(black_num=0, white_num=0):
    """Shorthand to spawn a job matching up two models from the same run,
    identified by their model number """
    if black_num <= 0 or white_num <= 0:
        print("Need real model numbers")
        return

    b = fsdb.get_model(black_num)
    w = fsdb.get_model(white_num)
    bucket = fsdb.eval_dir

    b_model_path = os.path.join(fsdb.models_dir(), b)
    w_model_path = os.path.join(fsdb.models_dir(), w)

    launch_eval_job(b_model_path + ".pb", w_model_path + ".pb",
                    "{:d}-{:d}".format(black_num, white_num), bucket)
示例#3
0
def train(working_dir):
    model_num, model_name = fsdb.get_latest_model()

    games = gfile.Glob(os.path.join(fsdb.selfplay_dir(), model_name, '*.zz'))
    if len(games) < MIN_GAMES_PER_GENERATION:
        print("{} doesn't have enough games to train a new model yet ({})".
              format(model_name, len(games)))
        print("Sleeping...")
        time.sleep(10 * 60)
        print("Done...")
        sys.exit(1)

    print("Training on gathered game data, initializing from {}".format(
        model_name))
    new_model_num = model_num + 1
    new_model_name = shipname.generate(new_model_num)
    print("New model will be {}".format(new_model_name))
    training_file = os.path.join(fsdb.golden_chunk_dir(),
                                 str(new_model_num) + '.tfrecord.zz')
    while not gfile.Exists(training_file):
        print("Waiting for", training_file)
        time.sleep(1 * 60)
    print("Using Golden File:", training_file)

    save_file = os.path.join(fsdb.models_dir(), new_model_name)
    try:
        main.train(working_dir, [training_file],
                   save_file,
                   generation_num=model_num + 1)
    except:
        logging.exception("Train error")
示例#4
0
def eval_pv(eval_positions):
    model_paths = oneoff_utils.get_model_paths(fsdb.models_dir())

    idx_start = FLAGS.idx_start
    eval_every = FLAGS.eval_every

    print("Evaluating models {}-{}, eval_every={}".format(
          idx_start, len(model_paths), eval_every))
    for idx in tqdm(range(idx_start, len(model_paths), eval_every)):
        if idx == idx_start:
            player = oneoff_utils.load_player(model_paths[idx])
        else:
            oneoff_utils.restore_params(model_paths[idx], player)

        mcts = strategies.MCTSPlayer(
            player.network,
            resign_threshold=-1)

        for name, position in eval_positions:
            mcts.initialize_game(position)
            mcts.suggest_move(position)

            path = []
            node = mcts.root
            while node.children:
                node = node.children.get(node.best_child())
                path.append("{},{}".format(node.fmove, int(node.N)))

            save_file = os.path.join(
                FLAGS.data_dir, "pv-{}-{}".format(name, idx))
            with open(save_file, "w") as data:
                data.write("{},  {}\n".format(idx, ",".join(path)))
示例#5
0
def validate(working_dir, model_num=None, validate_name=None):
    """ Runs validate on the directories up to the most recent model, or up to
    (but not including) the model specified by `model_num`
    """
    if model_num is None:
        model_num, model_name = fsdb.get_latest_model()
    else:
        model_num = int(model_num)
        model_name = fsdb.get_model(model_num)

    # Model N was trained on games up through model N-2, so the validation set
    # should only be for models through N-2 as well, thus the (model_num - 1)
    # term.
    models = list(
        filter(lambda num_name: num_name[0] < (model_num - 1),
               fsdb.get_models()))
    # Run on the most recent 50 generations,
    # TODO(brianklee): make this hyperparameter dependency explicit/not hardcoded
    holdout_dirs = [
        os.path.join(fsdb.holdout_dir(), pair[1]) for pair in models[-50:]
    ]

    main.validate(working_dir,
                  *holdout_dirs,
                  checkpoint_name=os.path.join(fsdb.models_dir(), model_name),
                  validate_name=validate_name)
示例#6
0
文件: rl_loop.py 项目: wtdeng/minigo
def train(working_dir):
    model_num, model_name = fsdb.get_latest_model()

    print("Training on gathered game data, initializing from {}".format(model_name))
    new_model_num = model_num + 1
    new_model_name = shipname.generate(new_model_num)
    print("New model will be {}".format(new_model_name))
    training_file = os.path.join(
        fsdb.golden_chunk_dir(), str(new_model_num) + '.tfrecord.zz')
    while not gfile.Exists(training_file):
        print("Waiting for", training_file)
        time.sleep(1*60)
    print("Using Golden File:", training_file)

    try:
        save_file = os.path.join(fsdb.models_dir(), new_model_name)
        print("Training model")
        dual_net.train(training_file)
        print("Exporting model to ", save_file)
        dual_net.export_model(working_dir, save_file)
    except Exception as e:
        import traceback
        logging.error(traceback.format_exc())
        print(traceback.format_exc())
        logging.exception("Train error")
        sys.exit(1)
示例#7
0
def main(unusedargv):
    sgf_files = oneoff_utils.find_and_filter_sgf_files(FLAGS.sgf_dir,
                                                       FLAGS.min_year,
                                                       FLAGS.komi)
    pos_data, move_data, result_data, move_idxs = sample_positions_from_games(
        sgf_files=sgf_files, num_positions=FLAGS.num_positions)
    df = get_training_curve_data(fsdb.models_dir(), pos_data, move_data,
                                 result_data, FLAGS.idx_start,
                                 FLAGS.eval_every)
    save_plots(FLAGS.plot_dir, df)
示例#8
0
def main(unusedargv):
    model_paths = oneoff_utils.get_model_paths(fsdb.models_dir())

    # List vars constructed when using dual_net.
    dual_net_list(model_paths[0])

    # Calculate l2 cost over a sequence of our models.
    df = get_l2_cost_data(model_paths, FLAGS.idx_start, FLAGS.eval_every)
    print(df)
    save_plots(FLAGS.plot_dir, df)
示例#9
0
def backfill():
    models = [m[1] for m in fsdb.get_models()]

    import dual_net
    import tensorflow as tf
    from tqdm import tqdm
    features, labels = dual_net.get_inference_input()
    dual_net.model_fn(features, labels, tf.estimator.ModeKeys.PREDICT,
                      dual_net.get_default_hyperparams())

    for model_name in tqdm(models):
        if model_name.endswith('-upgrade'):
            continue
        try:
            load_file = os.path.join(fsdb.models_dir(), model_name)
            dest_file = os.path.join(fsdb.models_dir(), model_name)
            main.convert(load_file, dest_file)
        except:
            print('failed on', model_name)
            continue
示例#10
0
def main():
    root = os.path.abspath(
        os.path.join("sgf", fsdb.FLAGS.bucket_name, "sgf/eval"))
    sync(root, True)
    models = fsdb.get_models()
    data = wins_subset(fsdb.models_dir())
    print(len(data))
    r = compute_ratings(data)
    for v, k in sorted([(v, k) for k, v in r.items()])[-20:][::-1]:
        print(models[model_num_for(k)][1], v)
    db = sqlite3.connect("ratings.db")
    print(db.execute("select count(*) from wins").fetchone()[0], "games")
    for m in models[-10:]:
        m_id = model_id(m[0])
        print(m[1], r.get(m_id, "model id not found({})".format(m_id)))
示例#11
0
def eval_policy(eval_positions):
    """Evaluate all positions with all models save the policy heatmaps as CSVs

    CSV name is "heatmap-<position_name>-<model-index>.csv"
    CSV format is: model number, value network output, policy network outputs

    position_name is taken from the SGF file
    Policy network outputs (19x19) are saved in flat order (see coord.from_flat)
    """

    model_paths = oneoff_utils.get_model_paths(fsdb.models_dir())

    idx_start = FLAGS.idx_start
    eval_every = FLAGS.eval_every

    print("Evaluating models {}-{}, eval_every={}".format(
        idx_start, len(model_paths), eval_every))

    player = None
    for i, idx in enumerate(
            tqdm(range(idx_start, len(model_paths), eval_every))):
        if player and i % 20 == 0:
            player.network.sess.close()
            tf.reset_default_graph()
            player = None

        if not player:
            player = oneoff_utils.load_player(model_paths[idx])
        else:
            oneoff_utils.restore_params(model_paths[idx], player)

        pos_names, positions = zip(*eval_positions)
        # This should be batched at somepoint.
        eval_probs, eval_values = player.network.run_many(positions)

        for pos_name, probs, value in zip(pos_names, eval_probs, eval_values):
            save_file = os.path.join(FLAGS.data_dir,
                                     "heatmap-{}-{}.csv".format(pos_name, idx))

            with open(save_file, "w") as data:
                data.write("{},  {},  {}\n".format(idx, value,
                                                   ",".join(map(str, probs))))
示例#12
0
def selfplay(verbose=2):
    _, model_name = fsdb.get_latest_model()
    games = gfile.Glob(os.path.join(fsdb.selfplay_dir(), model_name, '*.zz'))
    if len(games) > MAX_GAMES_PER_GENERATION:
        print("{} has enough games ({})".format(model_name, len(games)))
        time.sleep(10 * 60)
        sys.exit(1)
    print("Playing a game with model {}".format(model_name))
    model_save_path = os.path.join(fsdb.models_dir(), model_name)
    game_output_dir = os.path.join(fsdb.selfplay_dir(), model_name)
    game_holdout_dir = os.path.join(fsdb.holdout_dir(), model_name)
    sgf_dir = os.path.join(fsdb.sgf_dir(), model_name)
    main.selfplay(
        load_file=model_save_path,
        output_dir=game_output_dir,
        holdout_dir=game_holdout_dir,
        output_sgf=sgf_dir,
        holdout_pct=HOLDOUT_PCT,
        verbose=verbose,
    )
示例#13
0
def suggest_pairs(top_n=10, per_n=3):
    """ Find the maximally interesting pairs of players to match up
    First, sort the ratings by uncertainty.
    Then, take the ten highest players with the highest uncertainty
    For each of them, call them `p1`
    Sort all the models by their distance from p1's rating and take the 20
    nearest rated models. ('candidate_p2s')
    Choose pairings, (p1, p2), randomly from this list.

    'ratings' is a list of (model_num, rating, uncertainty) tuples

    Returns a list of *model numbers*, not model ids.
    """
    db = sqlite3.connect("ratings.db")
    data = db.execute("select model_winner, model_loser from wins").fetchall()
    bucket_ids = [
        id[0] for id in db.execute("select id from models where bucket = ?", (
            fsdb.models_dir(), )).fetchall()
    ]
    bucket_ids.sort()
    data = [d for d in data if d[0] in bucket_ids and d[1] in bucket_ids]

    ratings = [(model_num_for(k), v[0], v[1])
               for k, v in compute_ratings(data).items()]
    ratings.sort()
    ratings = ratings[
        100:]  # Filter off the first 100 models, which improve too fast.

    ratings.sort(key=lambda r: r[2], reverse=True)

    res = []
    for p1 in ratings[:top_n]:
        candidate_p2s = sorted(ratings,
                               key=lambda p2_tup: abs(p1[1] - p2_tup[1]))[1:20]
        choices = random.sample(candidate_p2s, per_n)
        print("Pairing {}, sigma {:.2f}".format(p1[0], p1[2]))
        for p2 in choices:
            res.append([p1[0], p2[0]])
            print("   {}, ratings delta {:.2f}".format(p2[0],
                                                       abs(p1[1] - p2[1])))
    return res
示例#14
0
def bootstrap(working_dir):
    bootstrap_name = shipname.generate(0)
    bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name)
    print("Bootstrapping with working dir {}\n Model 0 exported to {}".format(
        working_dir, bootstrap_model_path))
    main.bootstrap(working_dir, bootstrap_model_path)
示例#15
0
def swa():
    path_base = fsdb.models_dir()
    model_names = [
        "000393-lincoln",
        "000390-indus",
        "000404-hannibal",
        "000447-hawke",
        "000426-grief",
        "000431-lion",
        "000428-invincible",
        "000303-olympus",
        "000291-superb",
        "000454-victorious",
    ]
    model_names = model_names[:FLAGS.count]

    model_paths = [os.path.join(path_base, m) for m in model_names]

    # construct the graph
    features, labels = dual_net.get_inference_input()
    dual_net.model_fn(features, labels, tf.estimator.ModeKeys.PREDICT)

    # restore all saved weights
    meta_graph_def = meta_graph.read_meta_graph_file(model_paths[0] + '.meta')
    stored_var_names = set([
        n.name for n in meta_graph_def.graph_def.node if n.op == 'VariableV2'
    ])

    var_list = [
        v for v in tf.global_variables() if v.op.name in stored_var_names
    ]
    var_list.sort(key=lambda v: v.op.name)

    print(stored_var_names)
    print(len(stored_var_names), len(var_list))

    sessions = [tf.Session() for _ in model_paths]
    saver = tf.train.Saver()
    for sess, model_path in zip(sessions, model_paths):
        saver.restore(sess, model_path)

    # Load all VariableV2s for each model.
    values = [sess.run(var_list) for sess in sessions]

    # Iterate over all variables average values from all models.
    all_assign = []
    for var, vals in zip(var_list, zip(*values)):
        print("{}x {}".format(len(vals), var))
        if var.name == "global_step:0":
            avg = vals[0]
            for val in vals:
                avg = tf.maximum(avg, val)
        else:
            avg = tf.add_n(vals) / len(vals)
            continue

        all_assign.append(tf.assign(var, avg))

    # Run all asign ops on an existing model (which has other ops and graph).
    sess = sessions[0]
    sess.run(all_assign)

    # Export a new saved model.
    ensure_dir_exists(FLAGS.data_dir)
    dest_path = os.path.join(FLAGS.data_dir, "swa-" + str(FLAGS.count))
    saver.save(sess, dest_path)
示例#16
0
def top_n(n=10):
    data = wins_subset(fsdb.models_dir())
    r = compute_ratings(data)
    return [(model_num_for(k), v)
            for v, k in sorted([(v, k) for k, v in r.items()])[-n:][::-1]]
示例#17
0
def import_files(files, bucket=None):
    if bucket is None:
        bucket = fsdb.models_dir()

    print("Importing for bucket:", bucket)
    db = sqlite3.connect("ratings.db")
    new_games = 0
    with db:
        c = db.cursor()
        for _file in tqdm(files):
            match = re.match(EVAL_REGEX, os.path.basename(_file))
            if not match:
                print("Bad file: ", _file)
                continue
            timestamp = match.groups(1)[0]
            with open(_file) as f:
                text = f.read()
            pw = re.search(PW_REGEX, text)
            pb = re.search(PB_REGEX, text)
            result = re.search(RESULT_REGEX, text)
            if not (pw and pb and result):
                print("Fields not found: ", _file)

            pw = pw.group(1)
            pb = pb.group(1)
            result = result.group(1)

            m_num_w = re.match(MODEL_REGEX, pw).group(1)
            m_num_b = re.match(MODEL_REGEX, pb).group(1)
            # v10 and v9 have the same model and name for 588, so cross eval
            # games played with this model should be ignored.
            if m_num_w == '000588' or m_num_b == '000588':
                continue

            try:
                # create models or ignore.
                maybe_insert_model(db, bucket, pb, m_num_b)
                maybe_insert_model(db, bucket, pw, m_num_w)

                b_id = rowid_for(db, bucket, pb)
                w_id = rowid_for(db, bucket, pw)

                # insert into games or bail
                game_id = None
                try:
                    with db:
                        c = db.cursor()
                        c.execute(
                            """insert into games(timestamp, filename, b_id, w_id, black_won, result)
                                        values(?, ?, ?, ?, ?, ?)
                        """, [
                                timestamp,
                                os.path.relpath(_file), b_id, w_id,
                                result.lower().startswith('b'), result
                            ])
                        game_id = c.lastrowid
                except sqlite3.IntegrityError:
                    # print("Duplicate game: {}".format(_file))
                    continue

                if game_id is None:
                    print("Somehow, game_id was None")

                # update wins/game counts on model, and wins table.
                c.execute(
                    "update models set num_games = num_games + 1 where id in (?, ?)",
                    [b_id, w_id])
                if result.lower().startswith('b'):
                    c.execute(
                        "update models set black_games = black_games + 1, black_wins = black_wins + 1 where id = ?",
                        (b_id, ))
                    c.execute(
                        "update models set white_games = white_games + 1 where id = ?",
                        (w_id, ))
                    c.execute(
                        "insert into wins(game_id, model_winner, model_loser) values(?, ?, ?)",
                        [game_id, b_id, w_id])
                elif result.lower().startswith('w'):
                    c.execute(
                        "update models set black_games = black_games + 1 where id = ?",
                        (b_id, ))
                    c.execute(
                        "update models set white_games = white_games + 1, white_wins = white_wins + 1 where id = ?",
                        (w_id, ))
                    c.execute(
                        "insert into wins(game_id, model_winner, model_loser) values(?, ?, ?)",
                        [game_id, w_id, b_id])
                new_games += 1
                if new_games % 1000 == 0:
                    print("committing", new_games)
                    db.commit()
            except:
                print("Bailed!")
                db.rollback()
                raise
        print("Added {} new games to database".format(new_games))
示例#18
0
def model_id(name_or_num):
    db = sqlite3.connect("ratings.db")
    bucket = fsdb.models_dir()
    if not isinstance(name_or_num, str):
        name_or_num = fsdb.get_model(name_or_num)
    return rowid_for(db, bucket, name_or_num)