Exemplo n.º 1
0
def eval_pv(eval_positions):
    model_paths = oneoff_utils.get_model_paths(fsdb.models_dir())

    idx_start = FLAGS.idx_start
    eval_every = FLAGS.eval_every

    print("Evaluating models {}-{}, eval_every={}".format(
        idx_start, len(model_paths), eval_every))
    for idx in tqdm(range(idx_start, len(model_paths), eval_every)):
        if idx == idx_start:
            player = oneoff_utils.load_player(model_paths[idx])
        else:
            oneoff_utils.restore_params(model_paths[idx], player)

        mcts = strategies.MCTSPlayer(player.network, resign_threshold=-1)

        for name, position in eval_positions:
            mcts.initialize_game(position)
            mcts.suggest_move(position)

            path = []
            node = mcts.root
            while node.children:
                node = node.children.get(node.best_child())
                path.append("{},{}".format(node.fmove, int(node.N)))

            save_file = os.path.join(FLAGS.data_dir,
                                     "pv-{}-{}".format(name, idx))
            with open(save_file, "w") as data:
                data.write("{},  {}\n".format(idx, ",".join(path)))
Exemplo n.º 2
0
def get_training_curve_data(model_dir, pos_data, move_data, result_data,
                            idx_start, eval_every):
    model_paths = oneoff_utils.get_model_paths(model_dir)
    df = pd.DataFrame()
    player = None

    print("Evaluating models {}-{}, eval_every={}".format(
        idx_start, len(model_paths), eval_every))
    for idx in tqdm(range(idx_start, len(model_paths), eval_every)):
        if player:
            oneoff_utils.restore_params(model_paths[idx], player)
        else:
            player = oneoff_utils.load_player(model_paths[idx])

        correct, squared_errors = eval_player(player=player,
                                              positions=pos_data,
                                              moves=move_data,
                                              results=result_data)

        avg_acc = np.mean(correct)
        avg_mse = np.mean(squared_errors)
        print("Model: {}, acc: {:.4f}, mse: {:.4f}".format(
            model_paths[idx], avg_acc, avg_mse))
        df = df.append({
            "num": idx,
            "acc": avg_acc,
            "mse": avg_mse
        },
                       ignore_index=True)
    return df
Exemplo n.º 3
0
def eval_for_policy(eval_positions, model_dir, data_dir, idx_start, eval_every):
    """Evaluate all positions with all models save the policy heatmaps as CSVs

    CSV name is "heatmap-<position_name>-<model-index>.csv"
    CSV format is: model number, value network output, policy network outputs

    position_name is taken from the SGF file
    Policy network outputs (19x19) are saved in flat order (see coord.from_flat)
    """

    model_paths = oneoff_utils.get_model_paths(model_dir)

    print("Evaluating models {}-{}, eval_every={}".format(
          idx_start, len(model_paths), eval_every))
    for idx in tqdm(range(idx_start, len(model_paths), eval_every)):
        if idx == idx_start:
            player = oneoff_utils.load_player(model_paths[idx])
        else:
            oneoff_utils.restore_params(model_paths[idx], player)

        pos_names, positions = zip(*eval_positions)
        # This should be batched at somepoint.
        eval_probs, eval_values = player.network.run_many(positions)

        for pos_name, probs, value in zip(pos_names, eval_probs, eval_values):
            save_file = os.path.join(
                data_dir, "heatmap-{}-{}.csv".format(pos_name, idx))

            with open(save_file, "w") as data:
                data.write("{},  {},  {}\n".format(
                    idx, value, ",".join(map(str, probs))))
Exemplo n.º 4
0
def eval_policy(eval_positions):
    """Evaluate all positions with all models save the policy heatmaps as CSVs

    CSV name is "heatmap-<position_name>-<model-index>.csv"
    CSV format is: model number, value network output, policy network outputs

    position_name is taken from the SGF file
    Policy network outputs (19x19) are saved in flat order (see coord.from_flat)
    """

    model_paths = oneoff_utils.get_model_paths(fsdb.models_dir())

    idx_start = FLAGS.idx_start
    eval_every = FLAGS.eval_every

    print("Evaluating models {}-{}, eval_every={}".format(
        idx_start, len(model_paths), eval_every))

    player = None
    for i, idx in enumerate(
            tqdm(range(idx_start, len(model_paths), eval_every))):
        if player and i % 20 == 0:
            player.network.sess.close()
            tf.reset_default_graph()
            player = None

        if not player:
            player = oneoff_utils.load_player(model_paths[idx])
        else:
            oneoff_utils.restore_params(model_paths[idx], player)

        pos_names, positions = zip(*eval_positions)
        # This should be batched at somepoint.
        eval_probs, eval_values = player.network.run_many(positions)

        for pos_name, probs, value in zip(pos_names, eval_probs, eval_values):
            save_file = os.path.join(FLAGS.data_dir,
                                     "heatmap-{}-{}.csv".format(pos_name, idx))

            with open(save_file, "w") as data:
                data.write("{},  {},  {}\n".format(idx, value,
                                                   ",".join(map(str, probs))))
Exemplo n.º 5
0
    position_name is taken from the SGF file
    Policy network outputs (19x19) are saved in flat order (see coord.from_flat)
    """

    model_paths = oneoff_utils.get_model_paths(FLAGS.model_dir)

    idx_start = FLAGS.idx_start
    eval_every = FLAGS.eval_every

    print("Evaluating models {}-{}, eval_every={}".format(
          idx_start, len(model_paths), eval_every))

    for idx in tqdm(range(idx_start, len(model_paths), eval_every)):
        if idx == idx_start:
            player = oneoff_utils.load_player(model_paths[idx])
        else:
            oneoff_utils.restore_params(model_paths[idx], player)

        pos_names, positions = zip(*eval_positions)
        # This should be batched at somepoint.
        eval_probs, eval_values = player.network.run_many(positions)

        for pos_name, probs, value in zip(pos_names, eval_probs, eval_values):
            save_file = os.path.join(
                FLAGS.data_dir, "heatmap-{}-{}.csv".format(pos_name, idx))

            with open(save_file, "w") as data:
                data.write("{},  {},  {}\n".format(
                    idx, value, ",".join(map(str, probs))))
def evaluate():
    """Get Policy and Value for each network, for each position

    Usage:
        python3 sharp_positions.py evaluate --sgf_dir data/s --model_dir models/
    """
    def short_str(v):
        if isinstance(v, float):
            return "{.3f}".format(v)
        return str(v)

    # Load positons
    sgf_names, all_positions = get_final_positions()

    # Run and save some data about each position
    # Save to csv because that's easy
    model_paths = oneoff_utils.get_model_paths(FLAGS.model_dir)
    num_models = len(model_paths)
    print("Evaluating {} models: {} to {}".format(num_models, model_paths[0],
                                                  model_paths[-1]))
    print()

    with open(FLAGS.results, "w") as results:
        results.write(",".join(sgf_names) + "\n")

        player = None
        for idx in tqdm(range(FLAGS.min_idx, num_models, 1), desc="model"):
            model = model_paths[idx]

            if player and idx % 50 == 0:
                player.network.sess.close()
                tf.reset_default_graph()
                player = None

            if player:
                oneoff_utils.restore_params(model, player)
            else:
                player = oneoff_utils.load_player(model)

            row = [model]
            for positions in grouper(FLAGS.batch_size, all_positions):
                probs, values = player.network.run_many(positions)
                # NOTE(sethtroisi): For now we store the top n moves to shrink
                # the size of the recorded data.

                top_n = FLAGS.top_n
                top_policy_move = np.fliplr(np.argsort(probs))[:, :top_n]
                top_policy_value = np.fliplr(np.sort(probs))[:, :top_n]

                # One position at a time
                for v, m, p in zip(values, top_policy_move, top_policy_value):
                    row.append(v)
                    row.extend(itertools.chain.from_iterable(zip(m, p)))

                if len(positions) > 10:
                    average_seen = top_policy_value.sum() / len(positions)
                    if average_seen < 0.3:
                        print("\t", average_seen,
                              top_policy_value.sum(axis=-1))

            results.write(",".join(map(short_str, row)) + "\n")