def test(run, snapshot=-1, **kwargs): boardsize = runs.info(run)['boardsize'] worlds = hex.Hex.initial(n_envs=1024, boardsize=boardsize) network = storage.load_raw(run, 'model') sd = storage.load_snapshot(run, n=snapshot)['agent'] network.load_state_dict(storage.expand(sd)['network']) A = mcts.MCTSAgent(network.cuda(), **kwargs) network = storage.load_raw(run, 'model') sd = storage.load_snapshot(run, n=snapshot)['agent'] network.load_state_dict(storage.expand(sd)['network']) B = mcts.DummyAgent(network.cuda()) fst = analysis.rollout(worlds, [A, B], n_reps=1, eval=False) snd = analysis.rollout(worlds, [B, A], n_reps=1, eval=False) wins = count_wins(fst.transitions) + count_wins(snd.transitions).flipud() rate = wins[0] / wins.sum() elo = torch.log(rate) - torch.log(1 - rate) kl = (kl_div(fst.decisions['0']) + kl_div(snd.decisions['0'])) / 2 ent = (rel_entropy(fst.decisions['0']) + rel_entropy(snd.decisions['0'])) / 2 return {'elo': elo.item(), 'kl': kl.item(), 'ent': ent.item()}
def snapshot_kl_divs(run): import pandas as pd from pavlov import runs, storage from boardlaw import hex from boardlaw.main import mix import torch from tqdm.auto import tqdm m = storage.load_raw(run, 'model') worlds = mix(hex.Hex.initial(n_envs=16*1024, boardsize=runs.info(run)['params']['boardsize'])) logits = {} for idx in tqdm(storage.snapshots(run)): sd = storage.load_snapshot(run, idx)['agent'] m.load_state_dict(storage.expand(sd)['network']) logits[idx] = m(worlds).logits.detach() kldivs = {} for i in logits: for j in logits: li = logits[i] lj = logits[j] terms = -li.exp().mul(lj - li) mask = torch.isfinite(terms) kldiv = terms.where(mask, torch.zeros_like(terms)).sum(-1) kldivs[i, j] = kldiv.mean().item() df = pd.Series(kldivs).unstack() return df
def load(run): from boardlaw import mohex, hex, mcts boardsize = runs.info(run)['params']['boardsize'] worlds = hex.Hex.initial(9, boardsize=boardsize) network = storage.load_raw(run, 'model') agent = mcts.MCTSAgent(network) agent.load_state_dict(storage.load_latest(run)['agent']) return worlds, agent
def rollout_model(run=-1, mohex=True, eval=True, n_envs=1): from boardlaw import mcts, hex boardsize = runs.info(run)['boardsize'] worlds = hex.Hex.initial(n_envs=n_envs, boardsize=boardsize) network = storage.load_raw(run, 'model') agent = mcts.MCTSAgent(network, n_nodes=64) if mohex: from boardlaw import mohex agents = [agent, mohex.MoHexAgent(solver=True)] else: agents = [agent, agent] return rollout(worlds, agents, n_reps=1, eval=eval)
def generate_trained(run, n_envs=32 * 1024, device='cuda'): #TODO: Restore league and sched when you go back to large boards boardsize = runs.info(run)['boardsize'] worlds = mix(Hex.initial(n_envs, boardsize=boardsize, device=device)) network = storage.load_raw(run, 'model').cuda() agent = MCTSAgent(network) agent.load_state_dict(storage.load_latest(run, device)['agent']) sd = storage.load_latest(run) agent.load_state_dict(sd['agent']) yield from generate(agent, worlds)
def load_field(*args, key=('boardsize', 'width', 'depth')): rs = runs.pandas().loc[lambda df: df.description.fillna('').str.startswith('main/')].index head, tail = [], [] for r in rs: try: tail.append(stats.pandas(r, *args)) d = ast.literal_eval(runs.info(r)['_env']['JITTENS_PARAMS']) head.append(tuple(d[f] for f in key)) except Exception as e: log.info(f'Failed to load {r}: {e}') df = pd.DataFrame(tail, index=pd.MultiIndex.from_tuples(head)).T.sort_index(axis=1) df.columns.names = key return df.mean(axis=1, level=[0, 1, 2])
def worlds(run, n_envs, device='cpu'): boardsize = runs.info(run)['params']['boardsize'] return Hex.initial(n_envs, boardsize, device)