Пример #1
0
def test(run, snapshot=-1, **kwargs):
    boardsize = runs.info(run)['boardsize']
    worlds = hex.Hex.initial(n_envs=1024, boardsize=boardsize)

    network = storage.load_raw(run, 'model')
    sd = storage.load_snapshot(run, n=snapshot)['agent']
    network.load_state_dict(storage.expand(sd)['network'])
    A = mcts.MCTSAgent(network.cuda(), **kwargs)

    network = storage.load_raw(run, 'model')
    sd = storage.load_snapshot(run, n=snapshot)['agent']
    network.load_state_dict(storage.expand(sd)['network'])
    B = mcts.DummyAgent(network.cuda())

    fst = analysis.rollout(worlds, [A, B], n_reps=1, eval=False)
    snd = analysis.rollout(worlds, [B, A], n_reps=1, eval=False)

    wins = count_wins(fst.transitions) + count_wins(snd.transitions).flipud()

    rate = wins[0] / wins.sum()
    elo = torch.log(rate) - torch.log(1 - rate)

    kl = (kl_div(fst.decisions['0']) + kl_div(snd.decisions['0'])) / 2
    ent = (rel_entropy(fst.decisions['0']) +
           rel_entropy(snd.decisions['0'])) / 2
    return {'elo': elo.item(), 'kl': kl.item(), 'ent': ent.item()}
Пример #2
0
def snapshot_kl_divs(run):
    import pandas as pd
    from pavlov import runs, storage
    from boardlaw import hex
    from boardlaw.main import mix
    import torch
    from tqdm.auto import tqdm

    m = storage.load_raw(run, 'model')
    worlds = mix(hex.Hex.initial(n_envs=16*1024, boardsize=runs.info(run)['params']['boardsize']))

    logits = {}
    for idx in tqdm(storage.snapshots(run)):
        sd = storage.load_snapshot(run, idx)['agent']
        m.load_state_dict(storage.expand(sd)['network'])
        logits[idx] = m(worlds).logits.detach()
        
    kldivs = {}
    for i in logits:
        for j in logits:
            li = logits[i]
            lj = logits[j]
            terms = -li.exp().mul(lj - li)
            mask = torch.isfinite(terms)
            kldiv = terms.where(mask, torch.zeros_like(terms)).sum(-1)
            kldivs[i, j] = kldiv.mean().item()
    df = pd.Series(kldivs).unstack()

    return df
Пример #3
0
def load(run):
    from boardlaw import mohex, hex, mcts
    boardsize = runs.info(run)['params']['boardsize']

    worlds = hex.Hex.initial(9, boardsize=boardsize)

    network = storage.load_raw(run, 'model')
    agent = mcts.MCTSAgent(network)
    agent.load_state_dict(storage.load_latest(run)['agent'])

    return worlds, agent
Пример #4
0
def rollout_model(run=-1, mohex=True, eval=True, n_envs=1):
    from boardlaw import mcts, hex
    boardsize = runs.info(run)['boardsize']
    worlds = hex.Hex.initial(n_envs=n_envs, boardsize=boardsize)
    network = storage.load_raw(run, 'model')
    agent = mcts.MCTSAgent(network, n_nodes=64)
    if mohex:
        from boardlaw import mohex
        agents = [agent, mohex.MoHexAgent(solver=True)]
    else:
        agents = [agent, agent]
    return rollout(worlds, agents, n_reps=1, eval=eval)
Пример #5
0
def generate_trained(run, n_envs=32 * 1024, device='cuda'):
    #TODO: Restore league and sched when you go back to large boards
    boardsize = runs.info(run)['boardsize']
    worlds = mix(Hex.initial(n_envs, boardsize=boardsize, device=device))

    network = storage.load_raw(run, 'model').cuda()
    agent = MCTSAgent(network)
    agent.load_state_dict(storage.load_latest(run, device)['agent'])

    sd = storage.load_latest(run)
    agent.load_state_dict(sd['agent'])

    yield from generate(agent, worlds)
Пример #6
0
def load_field(*args, key=('boardsize', 'width', 'depth')):
    rs = runs.pandas().loc[lambda df: df.description.fillna('').str.startswith('main/')].index

    head, tail = [], []
    for r in rs:
        try:
            tail.append(stats.pandas(r, *args))
            d = ast.literal_eval(runs.info(r)['_env']['JITTENS_PARAMS'])
            head.append(tuple(d[f] for f in key))
        except Exception as e:
            log.info(f'Failed to load {r}: {e}')
            
    df = pd.DataFrame(tail, index=pd.MultiIndex.from_tuples(head)).T.sort_index(axis=1)
    df.columns.names = key

    return df.mean(axis=1, level=[0, 1, 2])
Пример #7
0
def worlds(run, n_envs, device='cpu'):
    boardsize = runs.info(run)['params']['boardsize']
    return Hex.initial(n_envs, boardsize, device)