Exemplo n.º 1
0
def self_play(tracker_queue,
              net,
              replay_queue,
              probs_queue,
              loop_count,
              device="cpu"):
    mcts_stores = [mcts.MCTS(), mcts.MCTS()]
    for j in range(SELF_PLAY_PERIOD):
        i = loop_count * SELF_PLAY_PERIOD + j
        t = time.perf_counter()
        status = ""
        _, steps = model.play_game(mcts_stores,
                                   replay_queue,
                                   probs_queue,
                                   net,
                                   net,
                                   steps_before_tau_0=STEPS_BEFORE_TAU_0,
                                   mcts_searches=MCTS_SEARCHES,
                                   mcts_batch_size=MCTS_BATCH_SIZE,
                                   device=device,
                                   status=status)
        game_steps = steps
        dt = time.perf_counter() - t
        speed_steps = game_steps / dt
        status = "episode #{}, steps {:3d}, processing time {:5.2f} [s], steps/s {:5.2f}".format(
            i, steps, dt, speed_steps)
        tracker_queue.put(("speed_steps", speed_steps, i))
        print("episode #%d, steps %3d, steps/s %5.2f" %
              (i, game_steps, speed_steps))
Exemplo n.º 2
0
def play(val, lock, mcts_store, net, best_idx, username, device, step_idx):
    while True:
        t = time.time()
        _, game_steps = model.play_game(val,
                                        mcts_store,
                                        None,
                                        net,
                                        net,
                                        steps_before_tau_0=STEPS_BEFORE_TAU_0,
                                        mcts_searches=MCTS_SEARCHES,
                                        mcts_batch_size=MCTS_BATCH_SIZE,
                                        best_idx=best_idx,
                                        url=URL,
                                        username=username,
                                        device=device)
        game_nodes = len(mcts_store)
        dt = time.time() - t
        speed_steps = game_steps / dt
        speed_nodes = game_nodes / dt

        bf = False
        lock.acquire()
        if game_steps > 0: val[1] += 1
        if val[0] <= 0: bf = True
        lock.release()
        if game_steps > 0:
            print(
                "Step %d, steps %3d, leaves %4d, steps/s %5.2f, leaves/s %6.2f, best_idx %d"
                % (step_idx + val[1], game_steps, game_nodes, speed_steps,
                   speed_nodes, best_idx))
        if bf: break
Exemplo n.º 3
0
def evaluate(net1, net2, rounds, device="cpu"):
    n1_win, n2_win = 0, 0
    mcts_stores = [mcts.MCTS(), mcts.MCTS()]

    for r_idx in range(rounds):
        r, _ = model.play_game(mcts_stores=mcts_stores, replay_buffer=None, net1=net1, net2=net2,
                               steps_before_tau_0=0, mcts_searches=20, mcts_batch_size=16,
                               device=device)
        if r < -0.5:
            n2_win += 1
        elif r > 0.5:
            n1_win += 1
    return n1_win / (n1_win + n2_win)
Exemplo n.º 4
0
def eval(val, lock, net1, net2, device, cpuf):
    if cpuf: net1.to(device); net2.to(device)
    mcts_stores = [mcts.MCTS(), mcts.MCTS()]
    while True:
        are = random.randrange(0, 2)
        r, _ = model.play_game(val, mcts_stores, None, net1=net1 if are<1 else net2,
                net2=net2 if are<1 else net1, steps_before_tau_0=20,
                            mcts_searches=40, mcts_batch_size=40, best_idx=-1, device=device)

        bf = False
        lock.acquire()
        if r!=None:
            val[1 if (r > 0.5 and are<1) or (r<-0.5 and are>=1) else 2] += 1
            print("%d:%d %d/%d"%(are,r,val[1],val[2]),end=' ', flush=True)
            if (val[1]+val[2]) % 5 <1: print()
        if val[0]<=0: bf=True
        lock.release()
        if bf: break
Exemplo n.º 5
0
def evaluate(net1, net2, rounds, device="cpu"):
    n1_win, n2_win = 0, 0
    mcts_stores = [mcts.MCTS(), mcts.MCTS()]

    for r_idx in range(rounds):
        r, step = model.play_game(None,
                                  mcts_stores,
                                  None,
                                  net1 if r_idx < rounds // 2 else net2,
                                  net2 if r_idx < rounds // 2 else net1,
                                  steps_before_tau_0=game.MAX_TURN,
                                  mcts_searches=40,
                                  mcts_batch_size=40,
                                  best_idx=-1,
                                  device=device)
        if (r > 0 and r_idx < rounds // 2) or (r < 0 and r_idx >= rounds // 2):
            n1_win += 1
        if r != 0: n2_win += 1
        print(r_idx, r, step)
    return (n1_win / n2_win) if n2_win > 0 else 0.5
Exemplo n.º 6
0
    optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9)

    replay_buffer = collections.deque(maxlen=REPLAY_BUFFER)
    mcts_store = mcts.MCTS()
    step_idx = 0
    best_idx = 0

    with ptan.common.utils.TBMeanTracker(writer, batch_size=10) as tb_tracker:
        while True:
            t = time.time()
            prev_nodes = len(mcts_store)
            game_steps = 0
            for _ in range(PLAY_EPISODES):
                _, steps = model.play_game(mcts_store, replay_buffer, best_net.target_model, best_net.target_model,
                                           steps_before_tau_0=STEPS_BEFORE_TAU_0, mcts_searches=MCTS_SEARCHES,
                                           mcts_batch_size=MCTS_BATCH_SIZE, device=device)
                game_steps += steps
            game_nodes = len(mcts_store) - prev_nodes
            dt = time.time() - t
            speed_steps = game_steps / dt
            speed_nodes = game_nodes / dt
            tb_tracker.track("speed_steps", speed_steps, step_idx)
            tb_tracker.track("speed_nodes", speed_nodes, step_idx)
            print("Step %d, steps %3d, leaves %4d, steps/s %5.2f, leaves/s %6.2f, best_idx %d, replay %d" % (
                step_idx, game_steps, game_nodes, speed_steps, speed_nodes, best_idx, len(replay_buffer)))
            step_idx += 1

            if len(replay_buffer) < MIN_REPLAY_TO_TRAIN:
                continue
        nets.append((fname, net))

    total_agent = {}
    total_pairs = {}

    for idx1, n1 in enumerate(nets):
        for idx2, n2 in enumerate(nets):
            if idx1 == idx2:
                continue
            wins, losses, draws = 0, 0, 0
            ts = time.time()
            for _ in range(args.rounds):
                r, _ = model.play_game(mcts_stores=None,
                                       replay_buffer=None,
                                       net1=n1[1],
                                       net2=n2[1],
                                       steps_before_tau_0=0,
                                       mcts_searches=MCTS_SEARCHES,
                                       mcts_batch_size=MCTS_BATCH_SIZE,
                                       device=device)
                if r > 0.5:
                    wins += 1
                elif r < -0.5:
                    losses += 1
                else:
                    draws += 1
            speed_games = args.rounds / (time.time() - ts)
            name_1, name_2 = n1[0], n2[0]
            print("%s vs %s -> w=%d, l=%d, d=%d" %
                  (name_1, name_2, wins, losses, draws))
            sys.stderr.write("Speed %.2f games/s\n" % speed_games)
            sys.stdout.flush()
Exemplo n.º 8
0
                        actions_n=actionTable.AllMoveLength).to(device)
        net.load_state_dict(checkpoint['model'], strict=False)
        net.eval()
        net.share_memory()

        if os.name == 'nt' and args.cuda:
            mcts_store = mcts.MCTS()
            for i in range(PLAY_EPISODE):
                t = time.time()
                _, game_steps = model.play_game(
                    None,
                    mcts_store,
                    None,
                    net,
                    net,
                    steps_before_tau_0=STEPS_BEFORE_TAU_0,
                    mcts_searches=MCTS_SEARCHES,
                    mcts_batch_size=MCTS_BATCH_SIZE,
                    best_idx=best_idx,
                    url=URL,
                    username=username,
                    device=device)
                game_nodes = len(mcts_store)
                dt = time.time() - t
                speed_steps = game_steps / dt
                speed_nodes = game_nodes / dt
                step_idx += 1
                print(
                    "Step %d, steps %3d, leaves %4d, steps/s %5.2f, leaves/s %6.2f, best_idx %d"
                    % (step_idx, game_steps, game_nodes, speed_steps,
                       speed_nodes, best_idx))
Exemplo n.º 9
0
    total_agent = {}
    total_pairs = {}

    for idx1, n1 in enumerate(nets):
        for idx2, n2 in enumerate(nets):
            if idx1 == idx2:
                continue
            wins, losses, draws = 0, 0, 0
            ts = time.time()
            for _ in range(args.rounds):
                r, _ = model.play_game(None,
                                       None,
                                       queue=None,
                                       net1=n1[1],
                                       net2=n2[1],
                                       steps_before_tau_0=game.MAX_TURN,
                                       mcts_searches=MCTS_SEARCHES,
                                       mcts_batch_size=MCTS_BATCH_SIZE,
                                       best_idx=-1,
                                       device=device)
                print(r)
                if r > 0.5:
                    wins += 1
                elif r < -0.5:
                    losses += 1
                else:
                    draws += 1
            speed_games = args.rounds / (time.time() - ts)
            name_1, name_2 = n1[0], n2[0]
            print("%s vs %s -> w=%d, l=%d, d=%d" %
                  (name_1, name_2, wins, losses, draws))
        net = model.Net(model.OBS_SHAPE, game.GAME_COLS)
        net.load_state_dict(torch.load(fname, map_location=lambda storage, loc: storage))
        net = net.to(device)
        nets.append((fname, net))

    total_agent = {}
    total_pairs = {}

    for idx1, n1 in enumerate(nets):
        for idx2, n2 in enumerate(nets):
            if idx1 == idx2:
                continue
            wins, losses, draws = 0, 0, 0
            ts = time.time()
            for _ in range(args.rounds):
                r, _ = model.play_game(mcts_stores=None, replay_buffer=None, net1=n1[1], net2=n2[1], steps_before_tau_0=0,
                                    mcts_searches=MCTS_SEARCHES, mcts_batch_size=MCTS_BATCH_SIZE, device=device)
                if r > 0.5:
                    wins += 1
                elif r < -0.5:
                    losses += 1
                else:
                    draws += 1
            speed_games = args.rounds / (time.time() - ts)
            name_1, name_2 = n1[0], n2[0]
            print("%s vs %s -> w=%d, l=%d, d=%d" % (name_1, name_2, wins, losses, draws))
            sys.stderr.write("Speed %.2f games/s\n" % speed_games)
            sys.stdout.flush()
            game.update_counts(total_agent, name_1, (wins, losses, draws))
            game.update_counts(total_agent, name_2, (losses, wins, draws))
            game.update_counts(total_pairs, (name_1, name_2), (wins, losses, draws))