def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) # Set game to online model. actors = ["actor"] additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) env = load_env(os.environ, num_models=1, additional_to_load=additional_to_load) GC = env["game"].initialize() args = env["game"].options model = env["model_loaders"][0].load_model(GC.params) # for actor_name, stat, model_loader, e in \ # zip(actors, stats, env["model_loaders"], evaluators): for i in range(len(actors)): actor_name = actors[i] e = env["eval_" + actor_name] mi = env["mi_" + actor_name] mi.add_model("actor", model, cuda=(args.gpu >= 0), gpu_id=args.gpu) print("register " + actor_name + " for e = " + str(e)) e.setup(sampler=env["sampler"], mi=mi) def actor(batch, e): reply = e.actor(batch) return reply GC.reg_callback(actor_name, lambda batch, e=e: actor(batch, e)) args = env["game"].options GC.start() for actor_name in actors: env["eval_" + actor_name].episode_start(0) while True: GC.run() GC.stop()
def main_loop(self): evaluator = Evaluator(stats=False) # Set game to online model. env, args = load_env(os.environ, evaluator=evaluator, overrides=dict( num_games=1, batchsize=1, num_games_per_thread=1, greedy=True, T=1, additional_labels="aug_code,move_idx")) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu) mi["model"].eval() mi["actor"].eval() self.evaluator = evaluator self.last_move_idx = None def human_actor(batch): print("In human_actor") return self.prompt("DF> ", batch) def actor(batch): return self.actor(batch) def train(batch): self.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.Start() evaluator.episode_start(0) while True: GC.Run() if self.exit: break GC.Stop()
def main(): print(sys.version) print(torch.__version__) print(torch.version.cuda) print("Conda env: \"%s\"" % os.environ.get("CONDA_DEFAULT_ENV", "")) additional_to_load = { 'trainer': (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'runner': (SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, additional_to_load=additional_to_load) trainer = env['trainer'] runner = env['runner'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) env["mi"].add_model("model", model, opt=True) GC.reg_callback("train", trainer.train) if GC.reg_has_callback("actor"): args = env["game"].options env["mi"].add_model("actor", model, copy=True, cuda=(args.gpu >= 0), gpu_id=args.gpu) GC.reg_callback("actor", trainer.actor) trainer.setup(sampler=env["sampler"], mi=env["mi"], rl_method=env["method"]) runner.setup(GC, episode_summary=trainer.episode_summary, \ episode_start=trainer.episode_start) runner.run()
def main_loop(self): evaluator = Evaluator(stats=False) # Set game to online model. env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(num_games=1, batchsize=1, num_games_per_thread=1, greedy=True, T=1, additional_labels="aug_code,move_idx")) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu) mi["model"].eval() mi["actor"].eval() self.evaluator = evaluator self.last_move_idx = None def human_actor(batch): print("In human_actor") return self.prompt("DF> ", batch) def actor(batch): return self.actor(batch) def train(batch): self.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.Start() evaluator.episode_start(0) while True: GC.Run() if self.exit: break GC.Stop()
#!/usr/bin/env python # -*- coding: utf-8 -*- import argparse from datetime import datetime import sys import os from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface if __name__ == '__main__': trainer = LSTMTrainer() runner = SingleProcessRun() env, all_args = load_env(os.environ, trainer=trainer, runner=runner) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model, optim_params={"lr": 0.001}) mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu) trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"]) GC.reg_callback("train", trainer.train)
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) additional_to_load = { 'evaluator': (Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), 'console': (GoConsoleGTP.get_option_spec(), lambda object_map: GoConsoleGTP(object_map)) } # Set game to online model. env = load_env(os.environ, overrides=dict(additional_labels=['aug_code', 'move_idx'], ), additional_to_load=additional_to_load) evaluator = env['evaluator'] GC = env["game"].initialize() console = env["console"] model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) gpu = model_loader.options.gpu use_gpu = gpu is not None and gpu >= 0 mi = env['mi'] mi.add_model("model", model) # mi.add_model( # "actor", model, # copy=True, cuda=use_gpu, gpu_id=gpu) mi.add_model("actor", model) mi["model"].eval() mi["actor"].eval() console.setup(GC, evaluator) def human_actor(batch): #py = psutil.Process(pid) #memoryUse = py.memory_info()[0]/2.**30 # memory use in GB...I think #print('memory use:', memoryUse) return console.prompt("", batch) def actor(batch): return console.actor(batch) def train(batch): console.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor_black", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.start() # TODO: For now fixed resign threshold to be 0.05. Will add a switch GC.game_obj.setRequest(mi["actor"].step, -1, 0.05, -1) evaluator.episode_start(0) while True: GC.run() if console.exit: break GC.stop()
#!/usr/bin/env python # -*- coding: utf-8 -*- import argparse from datetime import datetime import sys import os from rlpytorch import LSTMTrainer, Sampler, EvalIters, load_env, ModelLoader, ArgsProvider, ModelInterface if __name__ == '__main__': trainer = LSTMTrainer() eval_iters = EvalIters() env, all_args = load_env(os.environ, overrides=dict(actor_only=True), trainer=trainer, eval_iters=eval_iters) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu) trainer.setup(sampler=env["sampler"], mi=env["mi"])
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) # Set game to online model. actors = ["actor_black", "actor_white"] additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) env = load_env(os.environ, num_models=2, overrides={'actor_only': True}, additional_to_load=additional_to_load) GC = env["game"].initialize() stats = [Stats(), Stats()] for i in range(len(actors)): actor_name = actors[i] stat = stats[i] e = env["eval_" + actor_name] print(f'register {actor_name} for e = {e!s}') e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name]) def actor(batch, e, stat): reply = e.actor(batch) stat.feed(batch) return reply GC.reg_callback(actor_name, lambda batch, e=e, stat=stat: actor(batch, e, stat)) root = os.environ.get("root", "./") print(f'Root: "{root}"') args = env["game"].options loop_end = False def game_start(batch): print("In game start") vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])] # Use the version number to load models. for model_loader, ver, actor_name in zip(env["model_loaders"], vers, actors): if ver >= 0: while True: try: reload(env["mi_" + actor_name], model_loader, GC.params, args, root, ver, actor_name) break except BaseException: import traceback traceback.print_exc() time.sleep(10) def game_end(batch): nonlocal loop_end wr = batch.GC.getClient().getGameStats().getWinRateStats() win_rate = (100.0 * wr.black_wins / wr.total_games if wr.total_games > 0 else 0.0) print(f'{datetime.now()!s} B/W: {wr.black_wins}/{wr.white_wins}.' f'Black winrate: {win_rate:.2f} ({wr.total_games})') if args.suicide_after_n_games > 0 and \ wr.total_games >= args.suicide_after_n_games: print(f'#suicide_after_n_games: {args.suicide_after_n_games}, ' f'total_games: {wr.total_games}') loop_end = True GC.reg_callback_if_exists("game_start", game_start) GC.reg_callback_if_exists("game_end", game_end) GC.start() if args.eval_model_pair: if args.eval_model_pair.find(",") >= 0: black, white = args.eval_model_pair.split(",") else: black = extract_ver(env["model_loaders"][0]) white = extract_ver(env["model_loaders"][1]) # Force them to reload in the future. for model_loader, actor_name in zip(env["model_loaders"], actors): reload_model(model_loader, GC.params, env["mi_" + actor_name], actor_name, args) # We just use one thread to do selfplay. GC.GC.getClient().setRequest(int(black), int(white), env['game'].options.resign_thres, 1) for actor_name in actors: env["eval_" + actor_name].episode_start(0) while not loop_end: GC.run() GC.stop()
#!/usr/bin/env python # -*- coding: utf-8 -*- from datetime import datetime import sys import os from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters if __name__ == '__main__': evaluator = Evaluator(stats=False) eval_iters = EvalIters() env, args = load_env(os.environ, overrides=dict(actor_only=True), evaluator=evaluator, eval_iters=eval_iters) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) env["mi"].add_model("actor", model, cuda=not args.gpu is None, gpu_id=args.gpu) env["mi"]["actor"].eval() def actor(batch): reply = evaluator.actor(batch) ''' s = batch["s"][0][0]
import os from rlpytorch import Evaluator, load_env from console_lib import GoConsoleGTP if __name__ == '__main__': additional_to_load = { 'evaluator': (Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } # Set game to online model. env = load_env(os.environ, overrides=dict( num_games=1, greedy=True, T=1, model="online", additional_labels=['aug_code', 'move_idx'], ), additional_to_load=additional_to_load) evaluator = env['evaluator'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) gpu = model_loader.options.gpu use_gpu = gpu is not None and gpu >= 0
# Check whether the actions remains the same. if t < T - 1: key = (id, sel["seq"][t][i], sel["game_counter"][t][i]) recorded_a = self.idgseq2action[key] actual_a = sel["a"][t][i] if recorded_a != actual_a: self._debug( "%s Action was different. recorded %d, actual %d" % (prompt, recorded_a, actual_a)) # Overlapped by 1. self.id2seqs_train[id] = last_seq - 1 if __name__ == '__main__': collector = StatsCollector() runner = SingleProcessRun() env, all_args = load_env(os.environ, collector=collector, runner=runner) GC = env["game"].initialize() # GC.setup_gpu(0) collector.set_params(GC.params) GC.reg_callback("actor", collector.actor) GC.reg_callback("train", collector.train) GC.reg_sig_int() runner.setup(GC) runner.run()
def main(): # Set game to online model. actors = ["actor_black", "actor_white"] additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) env = load_env(os.environ, num_models=2, overrides=dict(actor_only=True), additional_to_load=additional_to_load) GC = env["game"].initialize() stats = [Stats(), Stats()] # for actor_name, stat, model_loader, e in \ # zip(actors, stats, env["model_loaders"], evaluators): for i in range(len(actors)): actor_name = actors[i] stat = stats[i] e = env["eval_" + actor_name] print("register " + actor_name + " for e = " + str(e)) e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name]) def actor(batch, e, stat): reply = e.actor(batch) stat.feed(batch) # eval_iters.stats.feed_batch(batch) return reply GC.reg_callback(actor_name, lambda batch, e=e, stat=stat: actor(batch, e, stat)) root = os.environ.get("root", "./") print("Root: \"%s\"" % root) args = env["game"].options global loop_end loop_end = False def game_start(batch): print("In game start") vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])] # Use the version number to load models. for model_loader, ver, actor_name in zip(env["model_loaders"], vers, actors): if ver >= 0: while True: try: reload(env["mi_" + actor_name], model_loader, GC.params, args, root, ver, actor_name) break except BaseException: import traceback traceback.print_exc() time.sleep(10) def game_end(batch): global loop_end # print("In game end") wr = batch.GC.getGameStats().getWinRateStats() win_rate = 100.0 * wr.black_wins / wr.total_games \ if wr.total_games > 0 else 0.0 print("%s B/W: %d/%d. Black winrate: %.2f (%d)" % (str(datetime.now()), wr.black_wins, wr.white_wins, win_rate, wr.total_games)) if args.suicide_after_n_games > 0 and \ wr.total_games >= args.suicide_after_n_games: print("#suicide_after_n_games: %d, total_games: %d" % (args.suicide_after_n_games, wr.total_games)) loop_end = True GC.reg_callback_if_exists("game_start", game_start) GC.reg_callback_if_exists("game_end", game_end) # def episode_start(i): # global GC # GC.GC.setSelfplayCount(10000) # evaluator.episode_start(i) GC.start() if args.eval_model_pair: if args.eval_model_pair.find(",") >= 0: black, white = args.eval_model_pair.split(",") else: black = extract_ver(env["model_loaders"][0]) white = extract_ver(env["model_loaders"][1]) # Force them to reload in the future. for model_loader, actor_name in zip(env["model_loaders"], actors): reload_model(model_loader, GC.params, env["mi_" + actor_name], actor_name, args) # We just use one thread to do selfplay. GC.GC.setRequest(int(black), int(white), env['game'].options.resign_thres, 1) for actor_name in actors: env["eval_" + actor_name].episode_start(0) while not loop_end: GC.run() GC.stop()
'gpu': 0, 'num_block': 20, 'dim': 224, 'mcts_puct': 1.50, 'batchsize': 16, 'mcts_rollout_per_batch': 16, 'mcts_threads': 2, 'mcts_rollout_per_thread': 64, # bigger value will spend more time to genmove 'resign_thres': 0.05, 'mcts_virtual_loss': 1, } # Set game to online model. env = load_env(os.environ, overrides=overrides, additional_to_load=additional_to_load) GC_PARAMS = { 'ACTION_CLEAR': -97, 'ACTION_PASS': -99, 'ACTION_RESIGN': -98, 'ACTION_SKIP': -100, 'board_size': 19, 'num_action': 362, 'num_future_actions': 1, 'num_planes': 18, 'opponent_stone_plane': 1, 'our_stone_plane': 0, 'num_group': 2, 'T': 1
# LICENSE file in the root directory of this source tree. #!/usr/bin/env python # -*- coding: utf-8 -*- from datetime import datetime import sys import os from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters if __name__ == '__main__': evaluator = Evaluator(stats=False) eval_iters = EvalIters() env, args = load_env(os.environ, overrides=dict(actor_only=True), eval_iters=eval_iters, evaluator=evaluator) GC = env["game"].initialize_reduced_service() model = env["model_loaders"][0].load_model(GC.params) mi = env["mi"] mi.add_model("actor", model, cuda=args.gpu is not None, gpu_id=args.gpu) def reduced_project(batch): output = mi["actor"].forward(batch.hist(0)) eval_iters.stats.feed_batch(batch) return dict(reduced_s=output["h"].data) def reduced_forward(batch): b = batch.hist(0) output = mi["actor"].transition(b["reduced_s"], b["a"])
import re import time from rlpytorch import load_env, SingleProcessRun, Trainer matcher = re.compile(r"save-(\d+).bin") if __name__ == '__main__': additional_to_load = { 'trainer': (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'runner': (SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, additional_to_load=additional_to_load) trainer = env['trainer'] runner = env['runner'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) env["mi"].add_model("model", model, opt=True) keep_prev_selfplay = env["game"].options.keep_prev_selfplay model_ver = 0 model_filename = model_loader.options.load if isinstance(model_filename, str) and model_filename != "": realpath = os.path.realpath(model_filename)
#!/usr/bin/env python # -*- coding: utf-8 -*- import argparse from datetime import datetime import sys import os from rlpytorch import LSTMTrainer, Sampler, EvalIters, load_env, ModelLoader, ArgsProvider, ModelInterface if __name__ == '__main__': trainer = LSTMTrainer() eval_iters = EvalIters() env, all_args = load_env(os.environ, overrides=dict(actor_only=True), trainer=trainer, eval_iters=eval_iters) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu) trainer.setup(sampler=env["sampler"], mi=env["mi"]) def actor(batch): reply = trainer.actor(batch) eval_iters.stats.feed_batch(batch) return reply
def main(): address = addrs['game_server'] if address != "": channel = grpc.insecure_channel(address + ':50051') else: channel = grpc.insecure_channel("localhost:50051") stub = play_pb2_grpc.TurnStub(channel) print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) additional_to_load = { 'evaluator': (Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } # Set game to online model. env = load_env(os.environ, overrides={ 'num_games': 1, 'greedy': True, 'T': 1, 'model': 'online', 'additional_labels': ['aug_code', 'move_idx'], }, additional_to_load=additional_to_load) evaluator = env['evaluator'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) mi = env['mi'] mi.add_model("model", model) mi.add_model("actor", model) mi["model"].eval() mi["actor"].eval() console = GoConsoleGTP(GC, evaluator) # TODO: create an instance of game when the client sends a request # print("\n\n\nCheck connect\n\n\n") # ID = stub.NewRoom(play_pb2.State(status = True)).ID # print("Current AI's ID is ", ID) # res_arr = stub.GetResumed(play_pb2.State(status = True, ID = ID)).move # console.res_len = len(res_arr) # # console.res_ind = 3 # # arr = ["BKD", "WFB", "BGA"] # if console.res_len > 0 and res_arr[-1][0].upper() == "B": # _ = stub.UpdateNext(play_pb2.State(status = True, ID = ID)) # def check_end_game(m): # if m.quit: # GC.stop() # return m def reset(): ID = stub.NewRoom(play_pb2.State(status=True)).ID console.ID = ID console.color = {'has_chosen': False, "client": 1, "AI": 2} console.prev_player = 0 print("Current AI's ID is ", console.ID) if not console.color["has_chosen"]: while not stub.HasChosen(play_pb2.State(status=True, ID=ID)).status: pass # AI_color = stub.GetAIPlayer(play_pb2.State(status = True)).color # human_color = AI_color % 2 + 1 console.color["AI"] = stub.GetAIPlayer( play_pb2.State(status=True, ID=ID)).color console.color["client"] = console.color["AI"] % 2 + 1 console.color["has_chosen"] = True console.res_arr = stub.GetResumed(play_pb2.State(status=True, ID=ID)).move console.res_len = len(console.res_arr) if console.res_len > 0 and console.res_arr[-1][0].upper() == "B": _ = stub.UpdateNext(play_pb2.State(status=True, ID=ID)) reset() def check_reset(reply): console.reset = stub.CheckExit( play_pb2.State(status=True, ID=console.ID)).status if console.reset: print("\n\n\nRestarting game...\n\n\n") reset() console.reset = False reply["a"] = console.actions["clear"] return True, reply return False, reply def human_actor(batch): # print("\n\n\nCheck human_actor\n\n\n") reply = dict(pi=None, a=None, V=0) ID = console.ID # console.reset = stub.CheckExit(play_pb2.State(status = True, ID = ID)).status # if console.reset: # print("\n\n\nRestarting game...\n\n\n") # reset() # console.reset = False # reply["a"] = console.actions["clear"] # return reply AI_color = console.color["AI"] human_color = console.color["client"] # is_resumed = stub.IsResumed(play_pb2.State(status = True)).status if console.res_len > 0: # print("\n\n\nCheck is_resumed = true\n\n\n") # print("\n\n\n", arr[-console.res_ind], "\n\n\n") reply["a"] = console.str2action(console.res_arr[-console.res_len]) console.res_len -= 1 return reply # print("\n\n\nCheck is_resumed = false\n\n\n") while True: if console.prev_player == 1: move = console.get_last_move(batch) x, y = move2xy(move) _ = stub.SetMove( play_pb2.Step(x=x, y=y, player=play_pb2.Player(color=AI_color, ID=ID))) _ = stub.UpdateNext(play_pb2.State(status=True, ID=ID)) if stub.IsNextPlayer(play_pb2.Player(color=AI_color, ID=ID)).status: reply["a"] = console.actions["skip"] console.prev_player = 1 return reply # else: while stub.IsNextPlayer(play_pb2.Player(color=human_color, ID=ID)).status: do_reset, reply = check_reset(reply) if do_reset: return reply pass human_xy = stub.GetMove(play_pb2.Player(color=human_color, ID=ID)) reply["a"] = console.move2action(xy2move(human_xy.x, human_xy.y)) console.prev_player = 2 return reply def actor(batch): return console.actor(batch) def train(batch): console.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor_black", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.start() GC.GC.getClient().setRequest(mi["actor"].step, -1, env['game'].options.resign_thres, -1) evaluator.episode_start(0) while True: GC.run() if console.exit: break GC.stop()
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) # Register player names actors = ["actor_white", "actor_black"] """ Class Evaluator is a pure python class, which run neural network in eval mode and get return results and update some stat info. Will creates 'eval_actor_white', 'eval_actor_black'. """ additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } """ class ModelInterface is a python class saving network models. Its member models is a key-value store to call a CNN model by name. Will creates 'mi_actor_white', 'mi_actor_black'. """ additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) """ load_env: game - load file game elfgames.american_checkers.game method - load "method" passed via params: file model_american_checkers.py return array with [model, method] model_file=elfgames.american_checkers.model_american_checkers model=df_pred model_loaders - prepare to load(returns instance of class ModelLoader) "model" passed via params: file model_american_checkers.py return array with [model, method] model_file=elfgames.american_checkers.model_american_checkers model=df_pred sampler - Used to sample an action from policy. mi - class ModelInterface is a python class saving network models. Its member models is a key-value store to call a CNN model by name. eval_* - run neural network in eval mode and get return results and update some stat info. """ env = load_env(os.environ, num_models=2, overrides={'actor_only': True}, additional_to_load=additional_to_load) """ Initializes keys('game_end', 'game_start', 'actor_white', 'actor_black') for communication Python and C++ code, defined in Game.py and GameFeature.h. Also, initializes GameContext from C++ library wrapped by GC from python side + sets mode that parsed from options like play/selfplay/train/offline_train. """ GC = env["game"].initialize() """ Registering the methods in the GameContext on the python side. We registered their names earlier when the game was initialized(names were registered on the python and C++ sides). Now its a registration of methods that will be called when we try to pass batch on eval from C++ to Python. Example: We register "human_actor" as key and register the same method on the python side. When AIClientT calls method act(it takes 2 parameters: state, and key) act connect to python and transmits the state by key("human_actor", "actor_black") to these methods(actor() func defined below). """ # Some statistic about batch usage, also we can add more info about games stats. stats = [Stats(), Stats()] for i in range(len(actors)): actor_name = actors[i] stat = stats[i] evaluator = env["eval_" + actor_name] evaluator.setup(sampler=env["sampler"], mi=env["mi_" + actor_name]) def actor(batch, evaluator, stat): reply = evaluator.actor(batch) stat.feed(batch) return reply # To expand the functionality we use lambda GC.reg_callback(actor_name, lambda batch, evaluator=evaluator, stat=stat: actor( batch, evaluator, stat)) # Get the directory containing the models. root = os.environ.get("root", "./") args = env["game"].options # Stops client after N games, defined in --suicide_after_n_games param. loop_end = False """ This method is responsible for updating the model to the current one(received from the server) after starting. Called by 'game_start' key from C++ side. """ def game_start(batch): info = "game_start() load/reload models\n" logger.info(info) vers = [int(batch["white_ver"][0]), int(batch["black_ver"][0])] # Use the version number to load models. for model_loader, ver, actor_name in zip(env["model_loaders"], vers, actors): if ver >= 0: while True: try: reload(env["mi_" + actor_name], model_loader, GC.params, args, root, ver, actor_name) break except BaseException: import traceback traceback.print_exc() time.sleep(10) """ This method is responsible for displaying game statistics, as well as stopping the client after N games(loop_end). Called by 'game_end' key from C++ side. """ def game_end(batch): nonlocal loop_end wr = batch.GC.getClient().getGameStats().getWinRateStats() win_rate = (100.0 * wr.black_wins / (wr.black_wins + wr.white_wins) if (wr.black_wins + wr.white_wins) > 0 else 0.0) info = f'game_end()\tB/W: {wr.black_wins}/{wr.white_wins}, ' info += f'Draw: {wr.both_lost}, ' info += f'Black winrate: {win_rate:.2f}, ' info += f'Total Games: {wr.total_games}' logger.info(info) if args.suicide_after_n_games > 0 and \ wr.total_games >= args.suicide_after_n_games: info = f'game_end()\tTotal Games: {wr.total_games}, ' info += f'#suicide_after_n_games: {args.suicide_after_n_games}' logger.info(info) loop_end = True # Registering the methods described above in Python's GameContext. GC.reg_callback_if_exists("game_start", game_start) GC.reg_callback_if_exists("game_end", game_end) GC.start() """ Upon receiving the --eval_model_pair parameter, we load 2 models from a file and pass models versions to C++ side for evaluation. """ if args.eval_model_pair: if args.eval_model_pair.find(",") >= 0: black, white = args.eval_model_pair.split(",") else: black = extract_ver(env["model_loaders"][0]) white = extract_ver(env["model_loaders"][1]) # Force them to reload in the future. for model_loader, actor_name in zip(env["model_loaders"], actors): reload_model(model_loader, GC.params, env["mi_" + actor_name], actor_name, args) # We just use one thread to do selfplay. GC.GC.getClient().setRequest(int(black), int(white), 1) # Called before each episode, resets actor_count(num of total nn call) for actor_name in actors: env["eval_" + actor_name].episode_start(0) while not loop_end: GC.run() GC.stop()
if sel["seq"][t][i] != last_seq + 1: self._debug("%s. Invalid next seq. seq should be %d" % (prompt, last_seq + 1)) last_seq += 1 # Check whether the actions remains the same. if t < T - 1: key = (id, sel["seq"][t][i], sel["game_counter"][t][i]) recorded_a = self.idgseq2action[key] actual_a = sel["a"][t][i] if recorded_a != actual_a: self._debug("%s Action was different. recorded %d, actual %d" % (prompt, recorded_a, actual_a)) # Overlapped by 1. self.id2seqs_train[id] = last_seq - 1 if __name__ == '__main__': collector = StatsCollector() runner = SingleProcessRun() env, all_args = load_env(os.environ, collector=collector, runner=runner) GC = env["game"].initialize() # GC.setup_gpu(0) collector.set_params(GC.params) GC.reg_callback("actor", collector.actor) GC.reg_callback("train", collector.train) GC.reg_sig_int() runner.setup(GC) runner.run()
from rlpytorch import load_env, SingleProcessRun, Trainer matcher = re.compile(r"save-(\d+).bin") if __name__ == '__main__': additional_to_load = { 'trainer': ( Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'runner': ( SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, additional_to_load=additional_to_load) trainer = env['trainer'] runner = env['runner'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) env["mi"].add_model("model", model, opt=True) keep_prev_selfplay = env["game"].options.keep_prev_selfplay model_ver = 0 model_filename = model_loader.options.load if isinstance(model_filename, str) and model_filename != "": realpath = os.path.realpath(model_filename)
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) additional_to_load = { 'trainer': (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'runner': (SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, additional_to_load=additional_to_load) trainer = env['trainer'] runner = env['runner'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) env["mi"].add_model("model", model, opt=True) keep_prev_selfplay = env["game"].options.keep_prev_selfplay model_ver = 0 model_filename = model_loader.options.load if isinstance(model_filename, str) and model_filename != "": realpath = os.path.realpath(model_filename) m = matcher.match(os.path.basename(realpath)) if m: model_ver = int(m.group(1)) eval_old_model = env["game"].options.eval_old_model if eval_old_model >= 0: GC.GC.getServer().setEvalMode(model_ver, eval_old_model) else: GC.GC.getServer().setInitialVersion(model_ver) selfplay_ver = model_ver root = os.environ["save"] print(f'Root: "{root}"') print(f'Keep prev_selfplay: {keep_prev_selfplay!s}') def train(batch, *args, **kwargs): # Check whether the version match. if keep_prev_selfplay or \ (batch["selfplay_ver"] != selfplay_ver).sum() == 0: trainer.train(batch, *args, **kwargs) else: print(f'Get batch whose selfplay ver is different from ' f'{selfplay_ver}, skipping') runner.inc_episode_counter(-1) def train_ctrl(batch, *args, **kwargs): nonlocal selfplay_ver old_selfplay_ver = selfplay_ver selfplay_ver = int(batch["selfplay_ver"][0]) print( f'Train ctrl: selfplay_ver: {old_selfplay_ver} -> {selfplay_ver}') GC.GC.getServer().waitForSufficientSelfplay(selfplay_ver) # Reload old models. real_path = os.path.join(root, "save-" + str(selfplay_ver) + ".bin") model_loader.options.load = real_path while True: try: model = model_loader.load_model(GC.params) break except BaseException: time.sleep(10) env["mi"].remove_model("model") env["mi"].add_model("model", model, opt=True) trainer.episode_reset() runner.set_episode_counter(-1) GC.reg_callback("train", train) GC.reg_callback("train_ctrl", train_ctrl) if GC.reg_has_callback("actor"): args = env["game"].options env["mi"].add_model("actor", model, copy=True, cuda=(args.gpu >= 0), gpu_id=args.gpu) GC.reg_callback("actor", trainer.actor) trainer.setup(sampler=env["sampler"], mi=env["mi"], rl_method=env["method"]) def episode_summary(i): nonlocal selfplay_ver ver = trainer.episode_summary(i) # This might block (when evaluation does not catch up with training). GC.GC.getServer().notifyNewVersion(selfplay_ver, ver) offline_training = (env["game"].options.mode == "offline_train") def after_start(): nonlocal selfplay_ver if not offline_training: print("About to wait for sufficient selfplay") GC.GC.getServer().waitForSufficientSelfplay(selfplay_ver) runner.setup(GC, after_start=after_start, episode_summary=episode_summary, episode_start=trainer.episode_start) runner.run()
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) additional_to_load = { 'evaluator': ( Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } # Set game to online model. env = load_env( os.environ, overrides={ 'num_games': 1, 'greedy': True, 'T': 1, 'model': 'online', 'additional_labels': ['aug_code', 'move_idx'], }, additional_to_load=additional_to_load) evaluator = env['evaluator'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) mi = env['mi'] mi.add_model("model", model) mi.add_model("actor", model) mi["model"].eval() mi["actor"].eval() console = GoConsoleGTP(GC, evaluator) def human_actor(batch): return console.prompt("", batch) def actor(batch): return console.actor(batch) def train(batch): console.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor_black", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.start() GC.GC.getClient().setRequest( mi["actor"].step, -1, env['game'].options.resign_thres, -1) evaluator.episode_start(0) while True: GC.run() if console.exit: break GC.stop()
from console_lib import GoConsoleGTP if __name__ == '__main__': additional_to_load = { 'evaluator': ( Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } # Set game to online model. env = load_env( os.environ, overrides=dict( num_games=1, greedy=True, T=1, model="online", additional_labels=['aug_code', 'move_idx'], ), additional_to_load=additional_to_load) evaluator = env['evaluator'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) gpu = model_loader.options.gpu use_gpu = gpu is not None and gpu >= 0
#!/usr/bin/env python # -*- coding: utf-8 -*- import argparse from datetime import datetime import sys import os from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface if __name__ == '__main__': trainer = LSTMTrainer() runner = SingleProcessRun() env, all_args = load_env(os.environ, trainer=trainer, runner=runner) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model, optim_params={ "lr" : 0.001}) mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu) trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"]) GC.reg_callback("train", trainer.train) GC.reg_callback("actor", trainer.actor) runner.setup(GC, episode_summary=trainer.episode_summary, episode_start=trainer.episode_start)
Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'trainer1': ( Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'mi0': ( ModelInterface.get_option_spec(), ModelInterface), 'mi1': ( ModelInterface.get_option_spec(), ModelInterface), 'runner': ( SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, num_models=2, additional_to_load=additional_to_load, overrides=dict(backprop0=False, backprop1=False, mode="offline_train")) trainer0 = env['trainer0'] trainer1 = env['trainer1'] runner = env['runner'] GC = env["game"].initialize() for i in range(2): model_loader = env["model_loaders"][i] model = model_loader.load_model(GC.params) env["mi%d" % i].add_model("model", model) env["mi%d" % i]["model"].eval() model_ver = 0
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) """ Class Evaluator is a pure python class, which run neural network in eval mode and get return results and update some stat info """ additional_to_load = { 'evaluator': ( Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } """ load_env: game - load file game elfgames.checkers.game method - load "method" passed via params: file df_model_checkers.py return array with [model, method] model_file=elfgames.checkers.df_model_checkers model=df_pred model_loaders - prepare to load(returns instance of class ModelLoader) "model" passed via params: file df_model_checkers.py return array with [model, method] model_file=elfgames.checkers.df_model_checkers model=df_pred sampler - Used to sample an action from policy. mi - class ModelInterface is a python class saving network models. Its member models is a key-value store to call a CNN model by name. evaluator - run neural network in eval mode and get return results and update some stat info. """ env = load_env( os.environ, overrides={ 'num_games': 1, 'greedy': True, 'T': 1, 'additional_labels': ['aug_code', 'move_idx'], }, additional_to_load=additional_to_load) evaluator = env['evaluator'] """ Initializes keys for communication Python and C++ code, defined in Game.py and GameFeature.h. Also, initializes GameContext from C++ library wrapped by GC from python side + sets mode that parsed from options like play/selfplay/train/offline_train. """ GC = env["game"].initialize() # Load model(use Model_PolicyValue from df_model_checkers.py) model_loader = env["model_loaders"][0] # Model contains init_conv, value_func, resnet and etc. model = model_loader.load_model(GC.params) """ Pass our model in ModelInterface ModelInterface stores our saved model and call nn when we need eval """ mi = env['mi'] mi.add_model("actor", model) # Checking the success installed model mi["actor"].eval() # Describe more! console = UgolkiConsole(GC, evaluator) def human_actor(batch): return console.prompt("", batch) def actor(batch): return console.actor(batch) evaluator.setup(sampler=env["sampler"], mi=mi) """ Register the methods in the GameContext on the python side. We registered their names earlier when the game was initialized(names were registered on the python and C++ sides). Now its a registration of methods that will be called when we try to pass batch on eval from C++ to Python. Example: We register "human_actor" as key and register the same method on the python side. When our AIClientT calls method act(it takes 2 parameters: state, and key) act connect to python and transmits the state by key("human_actor", "actor_black") to these methods """ GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("actor_black", actor) GC.start() # Tells the С++ side the model version GC.GC.getClient().setRequest( mi["actor"].step, -1, -1) # Called before each episode, resets actor_count(num of total nn call) evaluator.episode_start(0) while True: GC.run() if console.exit: break # fix this for normal exit # sys.exit() GC.stop()
# Copyright (c) 2017-present, Facebook, Inc. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. # Console for DarkForest import sys import os from rlpytorch import load_env, Evaluator, ModelInterface, ArgsProvider, EvalIters if __name__ == '__main__': evaluator = Evaluator(stats=False) # Set game to online model. env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(mode="selfplay", T=1)) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu) mi["model"].eval() mi["actor"].eval() evaluator.setup(mi=mi) total_batchsize = 0 total_sel_batchsize = 0 def actor(batch): global total_batchsize, total_sel_batchsize
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) logger = logging.getIndexedLogger( '\u001b[31;1m|py|\u001b[0melfgames.checkers.train-', '') # Trainer is also a pure python class wrapped on evaluator. # Train the models. # Runner - seems run all this shit. additional_to_load = { 'trainer': (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'runner': (SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, additional_to_load=additional_to_load) trainer = env['trainer'] runner = env['runner'] """ Initializes keys('train', 'train_ctrl') for communication Python and C++ code, defined in Game.py and GameFeature.h. Also, initializes GameContext from C++ library wrapped by GC from python side + sets mode that parsed from options like play/selfplay/train/offline_train. """ GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) env["mi"].add_model("model", model, opt=True) keep_prev_selfplay = env["game"].options.keep_prev_selfplay model_ver = 0 # Грузим модель, если указана model_filename = model_loader.options.load if isinstance(model_filename, str) and model_filename != "": realpath = os.path.realpath(model_filename) m = matcher.match(os.path.basename(realpath)) if m: model_ver = int(m.group(1)) eval_old_model = env["game"].options.eval_old_model # if eval_old_model >= 0: GC.GC.getServer().setEvalMode(model_ver, eval_old_model) else: GC.GC.getServer().setInitialVersion(model_ver) checkers_selfplay_ver = model_ver root = os.environ["save"] print(f'Save models in\t\t: "{root}"') print(f'Keep prev_selfplay\t: {keep_prev_selfplay!s}') def train(batch, *args, **kwargs): # Check whether the version match. if keep_prev_selfplay or \ (batch["checkers_selfplay_ver"] != checkers_selfplay_ver).sum() == 0: trainer.train(batch, *args, **kwargs) else: print(f'Get batch whose selfplay ver is different from ' f'{checkers_selfplay_ver}, skipping') runner.inc_episode_counter(-1) def train_ctrl(batch, *args, **kwargs): nonlocal checkers_selfplay_ver old_selfplay_ver = checkers_selfplay_ver checkers_selfplay_ver = int(batch["checkers_selfplay_ver"][0]) logger.info( f'Train ctrl: checkers_selfplay_ver: {old_selfplay_ver} -> {checkers_selfplay_ver}' ) # ожидаем нормально запоненого батча от клиентов GC.GC.getServer().ServerWaitForSufficientSelfplay( checkers_selfplay_ver) # Reload old models. real_path = os.path.join(root, "save-" + str(checkers_selfplay_ver) + ".bin") model_loader.options.load = real_path while True: try: model = model_loader.load_model(GC.params) break except BaseException: time.sleep(10) env["mi"].remove_model("model") env["mi"].add_model("model", model, opt=True) trainer.episode_reset() runner.set_episode_counter(-1) GC.reg_callback("train", train) GC.reg_callback("train_ctrl", train_ctrl) if GC.reg_has_callback("actor"): args = env["game"].options env["mi"].add_model("actor", model, copy=True, cuda=(args.gpu >= 0), gpu_id=args.gpu) GC.reg_callback("actor", trainer.actor) # +++++++++++++++++++++++++++++++++++++++++++++++ # +++++++++++++++++++++++++++++++++++++++++++++++ # +++++++++++++++++++++++++++++++++++++++++++++++ trainer.setup(sampler=env["sampler"], mi=env["mi"], rl_method=env["method"]) def episode_summary(i): nonlocal checkers_selfplay_ver logger.info("Episode_summary") ver = trainer.episode_summary(i) # This might block (when evaluation does not catch up with training). GC.GC.getServer().notifyNewVersion(checkers_selfplay_ver, ver) offline_training = (env["game"].options.mode == "offline_train") def after_start(): logger.info("after_start") nonlocal checkers_selfplay_ver if not offline_training: GC.GC.getServer().ServerWaitForSufficientSelfplay( checkers_selfplay_ver) # sys.exit(0) runner.setup(GC, after_start=after_start, episode_summary=episode_summary, episode_start=trainer.episode_start) runner.run_singe_process()
from additional import boardToJson from rlpytorch import Evaluator, load_env from py.UgolkiMoves import get_all_moves_ugolki from flask import Flask, session, redirect, url_for, request, render_template import json additional_to_load = { 'evaluator': (Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } env = load_env(os.environ, overrides={ 'num_games': 1, 'greedy': True, 'T': 1, 'additional_labels': ['aug_code', 'move_idx'], }, additional_to_load=additional_to_load) all_session = {} moves_for_human = get_all_moves_ugolki() def init_observation(player_id): global env evaluator = env['evaluator'] GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params)