def selfplay_thread(): nonlocal net, temperature, num_games torch.set_num_threads(1) with torch.no_grad(): while True: if net.metadata["iteration"] == 1 and options.get( "fast_first_iteration"): game_options = { **options, "zero_value": True, # ignore value net "num_visits": 10, "detailed_visits_prob": 1.0, "kl_surprise_weights": False, } temp_fn = lambda mv: 1.0 else: game_options = options temp_fn = temperature game_states = play_game( net_evaluator=evaluate, game_class=game_class, temperature=temp_fn, **game_options, ) if game_states: trainer.add_sample.remote(game_states) num_games += 1
def selfplay_proc(cpu, game_class, options): num_games_before_check = 5 temperature = lambda mv: 1.0 if mv < 4 else 0.1 # selfplay param net = game_class.create_net(cuda=True, **options) print(net.device) with torch.no_grad(): for iter in range(5): start = time.time() samples = 0 for i in range(num_games_before_check): game_states, endstate = play_game(net, game_class, temperature=temperature, **options) samples += len(game_states) print( f"CPU {cpu} self-play generated {samples} samples in {time.time()-start:.1f}s" )
def selfplay_thread(tid): nonlocal games_played torch.set_num_threads(1) games_played = 0 with torch.no_grad(): while True: start = time.time() samples = 0 game_states, endstate = play_game( net_evaluator=evaluate, game_class=game_class, temperature=temperature, **options, ) samples += len(game_states) dt = time.time() - start # games_q.put((game_states, endstate)) games_played += 1 print( f"[{games_played}] CPU {cpu} thread {tid} self-play generated {samples} samples (out of {endstate['end_move']} moves) in {dt:.1f}s" )
def test_play(): game = GoMokuState net = game.create_net() play_game(net, game, verbose=True)
parser = argparse.ArgumentParser(description="Self-play visualization.") parser.add_argument("--game", type=str, help="Game to play") parser.add_argument("--tag", type=str, help="Tag for experiment", default="") args = parser.parse_args() game = args.game if game == "cg": game_class = CaptureGoState elif game == "pxcg": game_class = PixelCaptureGoState elif game == "nim": game_class = NimState elif game == "oth": game_class = OthelloState else: raise Exception("unknown game") net = game_class.create_net(tag=args.tag) options = {} print(f"Loaded net {net.metadata['filename']} on cuda? {net.device}") temp_fn = lambda mv: 1.0 if mv < 2 else 0.1 with torch.no_grad(): game_states = play_game( net_evaluator=net.evaluate_sample, game_class=game_class, temperature=temp_fn, verbose=True, )
players.append(cls.create_net(net_ts=ts, tag=tag, cuda=False)) except Exception as e: print(e) print(len(players), "players loaded") options = {"num_visits": 1, "cpuct": 1.5, "force_win": True} options = {"num_visits": 1, "cpuct": 1.1} elocalc = BayesElo(players) for p1 in tqdm(players, ascii=True): for p2 in tqdm(players, ascii=True): if p1 is not p2: for _ in range(num_games): game_states, endstate = play_game([p1, p2], game_class, temperature=temp_fn, **options) result = endstate["value"][0] - endstate["value"][1] training_samples = play_game( net_evaluator=[p1.evaluate_sample, p2.evaluate_sample], game_class=game_class, temperature=temp_fn, **options ) v = training_samples[-1]["value"] result = v[0] - v[1] elocalc.add_result(p1, p2, result) df, aux = elocalc.summary_df() pd.set_option("display.max_rows", 500) df, aux = elocalc.summary_df(aux_zero=True)