def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) # Set game to online model. actors = ["actor"] additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) env = load_env(os.environ, num_models=1, additional_to_load=additional_to_load) GC = env["game"].initialize() args = env["game"].options model = env["model_loaders"][0].load_model(GC.params) # for actor_name, stat, model_loader, e in \ # zip(actors, stats, env["model_loaders"], evaluators): for i in range(len(actors)): actor_name = actors[i] e = env["eval_" + actor_name] mi = env["mi_" + actor_name] mi.add_model("actor", model, cuda=(args.gpu >= 0), gpu_id=args.gpu) print("register " + actor_name + " for e = " + str(e)) e.setup(sampler=env["sampler"], mi=mi) def actor(batch, e): reply = e.actor(batch) return reply GC.reg_callback(actor_name, lambda batch, e=e: actor(batch, e)) args = env["game"].options GC.start() for actor_name in actors: env["eval_" + actor_name].episode_start(0) while True: GC.run() GC.stop()
def main_loop(self): evaluator = Evaluator(stats=False) # Set game to online model. env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(num_games=1, batchsize=1, num_games_per_thread=1, greedy=True, T=1, additional_labels="aug_code,move_idx")) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu) mi["model"].eval() mi["actor"].eval() self.evaluator = evaluator self.last_move_idx = None def human_actor(batch): print("In human_actor") return self.prompt("DF> ", batch) def actor(batch): return self.actor(batch) def train(batch): self.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.Start() evaluator.episode_start(0) while True: GC.Run() if self.exit: break GC.Stop()
def main_loop(self): evaluator = Evaluator(stats=False) # Set game to online model. env, args = load_env(os.environ, evaluator=evaluator, overrides=dict( num_games=1, batchsize=1, num_games_per_thread=1, greedy=True, T=1, additional_labels="aug_code,move_idx")) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu) mi["model"].eval() mi["actor"].eval() self.evaluator = evaluator self.last_move_idx = None def human_actor(batch): print("In human_actor") return self.prompt("DF> ", batch) def actor(batch): return self.actor(batch) def train(batch): self.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.Start() evaluator.episode_start(0) while True: GC.Run() if self.exit: break GC.Stop()
def main(): # Set game to online model. actors = ["actor_black", "actor_white"] additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) env = load_env(os.environ, num_models=2, overrides=dict(actor_only=True), additional_to_load=additional_to_load) GC = env["game"].initialize() stats = [Stats(), Stats()] # for actor_name, stat, model_loader, e in \ # zip(actors, stats, env["model_loaders"], evaluators): for i in range(len(actors)): actor_name = actors[i] stat = stats[i] e = env["eval_" + actor_name] print("register " + actor_name + " for e = " + str(e)) e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name]) def actor(batch, e, stat): reply = e.actor(batch) stat.feed(batch) # eval_iters.stats.feed_batch(batch) return reply GC.reg_callback(actor_name, lambda batch, e=e, stat=stat: actor(batch, e, stat)) root = os.environ.get("root", "./") print("Root: \"%s\"" % root) args = env["game"].options global loop_end loop_end = False def game_start(batch): print("In game start") vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])] # Use the version number to load models. for model_loader, ver, actor_name in zip(env["model_loaders"], vers, actors): if ver >= 0: while True: try: reload(env["mi_" + actor_name], model_loader, GC.params, args, root, ver, actor_name) break except BaseException: import traceback traceback.print_exc() time.sleep(10) def game_end(batch): global loop_end # print("In game end") wr = batch.GC.getGameStats().getWinRateStats() win_rate = 100.0 * wr.black_wins / wr.total_games \ if wr.total_games > 0 else 0.0 print("%s B/W: %d/%d. Black winrate: %.2f (%d)" % (str(datetime.now()), wr.black_wins, wr.white_wins, win_rate, wr.total_games)) if args.suicide_after_n_games > 0 and \ wr.total_games >= args.suicide_after_n_games: print("#suicide_after_n_games: %d, total_games: %d" % (args.suicide_after_n_games, wr.total_games)) loop_end = True GC.reg_callback_if_exists("game_start", game_start) GC.reg_callback_if_exists("game_end", game_end) # def episode_start(i): # global GC # GC.GC.setSelfplayCount(10000) # evaluator.episode_start(i) GC.start() if args.eval_model_pair: if args.eval_model_pair.find(",") >= 0: black, white = args.eval_model_pair.split(",") else: black = extract_ver(env["model_loaders"][0]) white = extract_ver(env["model_loaders"][1]) # Force them to reload in the future. for model_loader, actor_name in zip(env["model_loaders"], actors): reload_model(model_loader, GC.params, env["mi_" + actor_name], actor_name, args) # We just use one thread to do selfplay. GC.GC.setRequest(int(black), int(white), env['game'].options.resign_thres, 1) for actor_name in actors: env["eval_" + actor_name].episode_start(0) while not loop_end: GC.run() GC.stop()
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) additional_to_load = { 'evaluator': ( Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } # Set game to online model. env = load_env( os.environ, overrides={ 'num_games': 1, 'greedy': True, 'T': 1, 'model': 'online', 'additional_labels': ['aug_code', 'move_idx'], }, additional_to_load=additional_to_load) evaluator = env['evaluator'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) mi = env['mi'] mi.add_model("model", model) mi.add_model("actor", model) mi["model"].eval() mi["actor"].eval() console = GoConsoleGTP(GC, evaluator) def human_actor(batch): return console.prompt("", batch) def actor(batch): return console.actor(batch) def train(batch): console.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor_black", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.start() GC.GC.getClient().setRequest( mi["actor"].step, -1, env['game'].options.resign_thres, -1) evaluator.episode_start(0) while True: GC.run() if console.exit: break GC.stop()
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) additional_to_load = { 'evaluator': (Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), 'console': (GoConsoleGTP.get_option_spec(), lambda object_map: GoConsoleGTP(object_map)) } # Set game to online model. env = load_env(os.environ, overrides=dict(additional_labels=['aug_code', 'move_idx'], ), additional_to_load=additional_to_load) evaluator = env['evaluator'] GC = env["game"].initialize() console = env["console"] model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) gpu = model_loader.options.gpu use_gpu = gpu is not None and gpu >= 0 mi = env['mi'] mi.add_model("model", model) # mi.add_model( # "actor", model, # copy=True, cuda=use_gpu, gpu_id=gpu) mi.add_model("actor", model) mi["model"].eval() mi["actor"].eval() console.setup(GC, evaluator) def human_actor(batch): #py = psutil.Process(pid) #memoryUse = py.memory_info()[0]/2.**30 # memory use in GB...I think #print('memory use:', memoryUse) return console.prompt("", batch) def actor(batch): return console.actor(batch) def train(batch): console.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor_black", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.start() # TODO: For now fixed resign threshold to be 0.05. Will add a switch GC.game_obj.setRequest(mi["actor"].step, -1, 0.05, -1) evaluator.episode_start(0) while True: GC.run() if console.exit: break GC.stop()
import gevent import grpc import os import signal import sys import traceback import game_pb2 import game_pb2_grpc import threading from time import sleep # Load env additional_to_load = { 'evaluator': (Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } # Set os environment os.environ.update({ 'game': 'elfgames.go.game', 'model': 'df_pred', 'model_file': 'elfgames.go.df_model3', }) overrides = { 'num_games': 1, 'greedy': True, 'T': 1, 'model': 'online',
import sys import os from rlpytorch import ModelLoader, load_module, Sampler, Evaluator, ModelInterface, ArgsProvider, EvalIters if __name__ == '__main__': parser = argparse.ArgumentParser() model_file = load_module(os.environ["model_file"]) model_class, method_class = model_file.Models[os.environ["model"]] model_loader = ModelLoader(model_class) game = load_module(os.environ["game"]).Loader() game.args.set_override(actor_only=True, game_multi=2) sampler = Sampler() evaluator = Evaluator(stats=False) eval_iters = EvalIters() args = ArgsProvider.Load(parser, [ game, sampler, evaluator, model_loader, eval_iters ]) GC = game.initialize() GC.setup_gpu(args.gpu) model = model_loader.load_model(GC.params) mi = ModelInterface() mi.add_model("model", model, optim_params={ "lr" : 0.001}) mi.add_model("actor", model, copy=True, cuda=True, gpu_id=args.gpu) def actor(batch): reply = evaluator.actor(batch)
# # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. #!/usr/bin/env python # -*- coding: utf-8 -*- from datetime import datetime import sys import os from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters if __name__ == '__main__': evaluator = Evaluator(stats=False) eval_iters = EvalIters() env, args = load_env(os.environ, overrides=dict(actor_only=True), eval_iters=eval_iters, evaluator=evaluator) GC = env["game"].initialize_reduced_service() model = env["model_loaders"][0].load_model(GC.params) mi = env["mi"] mi.add_model("actor", model, cuda=args.gpu is not None, gpu_id=args.gpu) def reduced_project(batch): output = mi["actor"].forward(batch.hist(0)) eval_iters.stats.feed_batch(batch) return dict(reduced_s=output["h"].data) def reduced_forward(batch):
# Copyright (c) 2017-present, Facebook, Inc. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. # Console for DarkForest import sys import os from rlpytorch import load_env, Evaluator, ModelInterface, ArgsProvider, EvalIters if __name__ == '__main__': evaluator = Evaluator(stats=False) # Set game to online model. env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(mode="selfplay", T=1)) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu) mi["model"].eval() mi["actor"].eval() evaluator.setup(mi=mi) total_batchsize = 0 total_sel_batchsize = 0 def actor(batch): global total_batchsize, total_sel_batchsize
# This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. An additional grant # of patent rights can be found in the PATENTS file in the same directory. #!/usr/bin/env python # -*- coding: utf-8 -*- from datetime import datetime import sys import os from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters if __name__ == '__main__': evaluator = Evaluator(stats=False) eval_iters = EvalIters() env, args = load_env(os.environ, overrides=dict(actor_only=True), eval_iters=eval_iters, evaluator=evaluator) GC = env["game"].initialize_reduced_service() model = env["model_loaders"][0].load_model(GC.params) mi = env["mi"] mi.add_model("actor", model, cuda=args.gpu is not None, gpu_id=args.gpu) def reduced_project(batch): output = mi["actor"].forward(batch.hist(0)) eval_iters.stats.feed_batch(batch) return dict(reduced_s=output["h"].data) def reduced_forward(batch):
# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. # Console for DarkForest import os from rlpytorch import Evaluator, load_env from console_lib import GoConsoleGTP if __name__ == '__main__': additional_to_load = { 'evaluator': ( Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } # Set game to online model. env = load_env( os.environ, overrides=dict( num_games=1, greedy=True, T=1, model="online", additional_labels=['aug_code', 'move_idx'], ), additional_to_load=additional_to_load)
def main(): address = addrs['game_server'] if address != "": channel = grpc.insecure_channel(address + ':50051') else: channel = grpc.insecure_channel("localhost:50051") stub = play_pb2_grpc.TurnStub(channel) print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) additional_to_load = { 'evaluator': (Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } # Set game to online model. env = load_env(os.environ, overrides={ 'num_games': 1, 'greedy': True, 'T': 1, 'model': 'online', 'additional_labels': ['aug_code', 'move_idx'], }, additional_to_load=additional_to_load) evaluator = env['evaluator'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) mi = env['mi'] mi.add_model("model", model) mi.add_model("actor", model) mi["model"].eval() mi["actor"].eval() console = GoConsoleGTP(GC, evaluator) # TODO: create an instance of game when the client sends a request # print("\n\n\nCheck connect\n\n\n") # ID = stub.NewRoom(play_pb2.State(status = True)).ID # print("Current AI's ID is ", ID) # res_arr = stub.GetResumed(play_pb2.State(status = True, ID = ID)).move # console.res_len = len(res_arr) # # console.res_ind = 3 # # arr = ["BKD", "WFB", "BGA"] # if console.res_len > 0 and res_arr[-1][0].upper() == "B": # _ = stub.UpdateNext(play_pb2.State(status = True, ID = ID)) # def check_end_game(m): # if m.quit: # GC.stop() # return m def reset(): ID = stub.NewRoom(play_pb2.State(status=True)).ID console.ID = ID console.color = {'has_chosen': False, "client": 1, "AI": 2} console.prev_player = 0 print("Current AI's ID is ", console.ID) if not console.color["has_chosen"]: while not stub.HasChosen(play_pb2.State(status=True, ID=ID)).status: pass # AI_color = stub.GetAIPlayer(play_pb2.State(status = True)).color # human_color = AI_color % 2 + 1 console.color["AI"] = stub.GetAIPlayer( play_pb2.State(status=True, ID=ID)).color console.color["client"] = console.color["AI"] % 2 + 1 console.color["has_chosen"] = True console.res_arr = stub.GetResumed(play_pb2.State(status=True, ID=ID)).move console.res_len = len(console.res_arr) if console.res_len > 0 and console.res_arr[-1][0].upper() == "B": _ = stub.UpdateNext(play_pb2.State(status=True, ID=ID)) reset() def check_reset(reply): console.reset = stub.CheckExit( play_pb2.State(status=True, ID=console.ID)).status if console.reset: print("\n\n\nRestarting game...\n\n\n") reset() console.reset = False reply["a"] = console.actions["clear"] return True, reply return False, reply def human_actor(batch): # print("\n\n\nCheck human_actor\n\n\n") reply = dict(pi=None, a=None, V=0) ID = console.ID # console.reset = stub.CheckExit(play_pb2.State(status = True, ID = ID)).status # if console.reset: # print("\n\n\nRestarting game...\n\n\n") # reset() # console.reset = False # reply["a"] = console.actions["clear"] # return reply AI_color = console.color["AI"] human_color = console.color["client"] # is_resumed = stub.IsResumed(play_pb2.State(status = True)).status if console.res_len > 0: # print("\n\n\nCheck is_resumed = true\n\n\n") # print("\n\n\n", arr[-console.res_ind], "\n\n\n") reply["a"] = console.str2action(console.res_arr[-console.res_len]) console.res_len -= 1 return reply # print("\n\n\nCheck is_resumed = false\n\n\n") while True: if console.prev_player == 1: move = console.get_last_move(batch) x, y = move2xy(move) _ = stub.SetMove( play_pb2.Step(x=x, y=y, player=play_pb2.Player(color=AI_color, ID=ID))) _ = stub.UpdateNext(play_pb2.State(status=True, ID=ID)) if stub.IsNextPlayer(play_pb2.Player(color=AI_color, ID=ID)).status: reply["a"] = console.actions["skip"] console.prev_player = 1 return reply # else: while stub.IsNextPlayer(play_pb2.Player(color=human_color, ID=ID)).status: do_reset, reply = check_reset(reply) if do_reset: return reply pass human_xy = stub.GetMove(play_pb2.Player(color=human_color, ID=ID)) reply["a"] = console.move2action(xy2move(human_xy.x, human_xy.y)) console.prev_player = 2 return reply def actor(batch): return console.actor(batch) def train(batch): console.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor_black", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.start() GC.GC.getClient().setRequest(mi["actor"].step, -1, env['game'].options.resign_thres, -1) evaluator.episode_start(0) while True: GC.run() if console.exit: break GC.stop()
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) # Register player names actors = ["actor_white", "actor_black"] """ Class Evaluator is a pure python class, which run neural network in eval mode and get return results and update some stat info. Will creates 'eval_actor_white', 'eval_actor_black'. """ additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } """ class ModelInterface is a python class saving network models. Its member models is a key-value store to call a CNN model by name. Will creates 'mi_actor_white', 'mi_actor_black'. """ additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) """ load_env: game - load file game elfgames.american_checkers.game method - load "method" passed via params: file model_american_checkers.py return array with [model, method] model_file=elfgames.american_checkers.model_american_checkers model=df_pred model_loaders - prepare to load(returns instance of class ModelLoader) "model" passed via params: file model_american_checkers.py return array with [model, method] model_file=elfgames.american_checkers.model_american_checkers model=df_pred sampler - Used to sample an action from policy. mi - class ModelInterface is a python class saving network models. Its member models is a key-value store to call a CNN model by name. eval_* - run neural network in eval mode and get return results and update some stat info. """ env = load_env(os.environ, num_models=2, overrides={'actor_only': True}, additional_to_load=additional_to_load) """ Initializes keys('game_end', 'game_start', 'actor_white', 'actor_black') for communication Python and C++ code, defined in Game.py and GameFeature.h. Also, initializes GameContext from C++ library wrapped by GC from python side + sets mode that parsed from options like play/selfplay/train/offline_train. """ GC = env["game"].initialize() """ Registering the methods in the GameContext on the python side. We registered their names earlier when the game was initialized(names were registered on the python and C++ sides). Now its a registration of methods that will be called when we try to pass batch on eval from C++ to Python. Example: We register "human_actor" as key and register the same method on the python side. When AIClientT calls method act(it takes 2 parameters: state, and key) act connect to python and transmits the state by key("human_actor", "actor_black") to these methods(actor() func defined below). """ # Some statistic about batch usage, also we can add more info about games stats. stats = [Stats(), Stats()] for i in range(len(actors)): actor_name = actors[i] stat = stats[i] evaluator = env["eval_" + actor_name] evaluator.setup(sampler=env["sampler"], mi=env["mi_" + actor_name]) def actor(batch, evaluator, stat): reply = evaluator.actor(batch) stat.feed(batch) return reply # To expand the functionality we use lambda GC.reg_callback(actor_name, lambda batch, evaluator=evaluator, stat=stat: actor( batch, evaluator, stat)) # Get the directory containing the models. root = os.environ.get("root", "./") args = env["game"].options # Stops client after N games, defined in --suicide_after_n_games param. loop_end = False """ This method is responsible for updating the model to the current one(received from the server) after starting. Called by 'game_start' key from C++ side. """ def game_start(batch): info = "game_start() load/reload models\n" logger.info(info) vers = [int(batch["white_ver"][0]), int(batch["black_ver"][0])] # Use the version number to load models. for model_loader, ver, actor_name in zip(env["model_loaders"], vers, actors): if ver >= 0: while True: try: reload(env["mi_" + actor_name], model_loader, GC.params, args, root, ver, actor_name) break except BaseException: import traceback traceback.print_exc() time.sleep(10) """ This method is responsible for displaying game statistics, as well as stopping the client after N games(loop_end). Called by 'game_end' key from C++ side. """ def game_end(batch): nonlocal loop_end wr = batch.GC.getClient().getGameStats().getWinRateStats() win_rate = (100.0 * wr.black_wins / (wr.black_wins + wr.white_wins) if (wr.black_wins + wr.white_wins) > 0 else 0.0) info = f'game_end()\tB/W: {wr.black_wins}/{wr.white_wins}, ' info += f'Draw: {wr.both_lost}, ' info += f'Black winrate: {win_rate:.2f}, ' info += f'Total Games: {wr.total_games}' logger.info(info) if args.suicide_after_n_games > 0 and \ wr.total_games >= args.suicide_after_n_games: info = f'game_end()\tTotal Games: {wr.total_games}, ' info += f'#suicide_after_n_games: {args.suicide_after_n_games}' logger.info(info) loop_end = True # Registering the methods described above in Python's GameContext. GC.reg_callback_if_exists("game_start", game_start) GC.reg_callback_if_exists("game_end", game_end) GC.start() """ Upon receiving the --eval_model_pair parameter, we load 2 models from a file and pass models versions to C++ side for evaluation. """ if args.eval_model_pair: if args.eval_model_pair.find(",") >= 0: black, white = args.eval_model_pair.split(",") else: black = extract_ver(env["model_loaders"][0]) white = extract_ver(env["model_loaders"][1]) # Force them to reload in the future. for model_loader, actor_name in zip(env["model_loaders"], actors): reload_model(model_loader, GC.params, env["mi_" + actor_name], actor_name, args) # We just use one thread to do selfplay. GC.GC.getClient().setRequest(int(black), int(white), 1) # Called before each episode, resets actor_count(num of total nn call) for actor_name in actors: env["eval_" + actor_name].episode_start(0) while not loop_end: GC.run() GC.stop()
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) """ Class Evaluator is a pure python class, which run neural network in eval mode and get return results and update some stat info """ additional_to_load = { 'evaluator': ( Evaluator.get_option_spec(), lambda object_map: Evaluator(object_map, stats=None)), } """ load_env: game - load file game elfgames.checkers.game method - load "method" passed via params: file df_model_checkers.py return array with [model, method] model_file=elfgames.checkers.df_model_checkers model=df_pred model_loaders - prepare to load(returns instance of class ModelLoader) "model" passed via params: file df_model_checkers.py return array with [model, method] model_file=elfgames.checkers.df_model_checkers model=df_pred sampler - Used to sample an action from policy. mi - class ModelInterface is a python class saving network models. Its member models is a key-value store to call a CNN model by name. evaluator - run neural network in eval mode and get return results and update some stat info. """ env = load_env( os.environ, overrides={ 'num_games': 1, 'greedy': True, 'T': 1, 'additional_labels': ['aug_code', 'move_idx'], }, additional_to_load=additional_to_load) evaluator = env['evaluator'] """ Initializes keys for communication Python and C++ code, defined in Game.py and GameFeature.h. Also, initializes GameContext from C++ library wrapped by GC from python side + sets mode that parsed from options like play/selfplay/train/offline_train. """ GC = env["game"].initialize() # Load model(use Model_PolicyValue from df_model_checkers.py) model_loader = env["model_loaders"][0] # Model contains init_conv, value_func, resnet and etc. model = model_loader.load_model(GC.params) """ Pass our model in ModelInterface ModelInterface stores our saved model and call nn when we need eval """ mi = env['mi'] mi.add_model("actor", model) # Checking the success installed model mi["actor"].eval() # Describe more! console = UgolkiConsole(GC, evaluator) def human_actor(batch): return console.prompt("", batch) def actor(batch): return console.actor(batch) evaluator.setup(sampler=env["sampler"], mi=mi) """ Register the methods in the GameContext on the python side. We registered their names earlier when the game was initialized(names were registered on the python and C++ sides). Now its a registration of methods that will be called when we try to pass batch on eval from C++ to Python. Example: We register "human_actor" as key and register the same method on the python side. When our AIClientT calls method act(it takes 2 parameters: state, and key) act connect to python and transmits the state by key("human_actor", "actor_black") to these methods """ GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("actor_black", actor) GC.start() # Tells the С++ side the model version GC.GC.getClient().setRequest( mi["actor"].step, -1, -1) # Called before each episode, resets actor_count(num of total nn call) evaluator.episode_start(0) while True: GC.run() if console.exit: break # fix this for normal exit # sys.exit() GC.stop()
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) # Set game to online model. actors = ["actor_black", "actor_white"] additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) env = load_env(os.environ, num_models=2, overrides={'actor_only': True}, additional_to_load=additional_to_load) GC = env["game"].initialize() stats = [Stats(), Stats()] for i in range(len(actors)): actor_name = actors[i] stat = stats[i] e = env["eval_" + actor_name] print(f'register {actor_name} for e = {e!s}') e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name]) def actor(batch, e, stat): reply = e.actor(batch) stat.feed(batch) return reply GC.reg_callback(actor_name, lambda batch, e=e, stat=stat: actor(batch, e, stat)) root = os.environ.get("root", "./") print(f'Root: "{root}"') args = env["game"].options loop_end = False def game_start(batch): print("In game start") vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])] # Use the version number to load models. for model_loader, ver, actor_name in zip(env["model_loaders"], vers, actors): if ver >= 0: while True: try: reload(env["mi_" + actor_name], model_loader, GC.params, args, root, ver, actor_name) break except BaseException: import traceback traceback.print_exc() time.sleep(10) def game_end(batch): nonlocal loop_end wr = batch.GC.getClient().getGameStats().getWinRateStats() win_rate = (100.0 * wr.black_wins / wr.total_games if wr.total_games > 0 else 0.0) print(f'{datetime.now()!s} B/W: {wr.black_wins}/{wr.white_wins}.' f'Black winrate: {win_rate:.2f} ({wr.total_games})') if args.suicide_after_n_games > 0 and \ wr.total_games >= args.suicide_after_n_games: print(f'#suicide_after_n_games: {args.suicide_after_n_games}, ' f'total_games: {wr.total_games}') loop_end = True GC.reg_callback_if_exists("game_start", game_start) GC.reg_callback_if_exists("game_end", game_end) GC.start() if args.eval_model_pair: if args.eval_model_pair.find(",") >= 0: black, white = args.eval_model_pair.split(",") else: black = extract_ver(env["model_loaders"][0]) white = extract_ver(env["model_loaders"][1]) # Force them to reload in the future. for model_loader, actor_name in zip(env["model_loaders"], actors): reload_model(model_loader, GC.params, env["mi_" + actor_name], actor_name, args) # We just use one thread to do selfplay. GC.GC.getClient().setRequest(int(black), int(white), env['game'].options.resign_thres, 1) for actor_name in actors: env["eval_" + actor_name].episode_start(0) while not loop_end: GC.run() GC.stop()
# This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. An additional grant # of patent rights can be found in the PATENTS file in the same directory. #!/usr/bin/env python # -*- coding: utf-8 -*- from datetime import datetime import sys import os from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters if __name__ == '__main__': evaluator = Evaluator(stats=False) eval_iters = EvalIters() env, args = load_env(os.environ, overrides=dict(actor_only=True), evaluator=evaluator, eval_iters=eval_iters) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) env["mi"].add_model("actor", model, cuda=not args.gpu is None, gpu_id=args.gpu) env["mi"]["actor"].eval()
# # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. #!/usr/bin/env python # -*- coding: utf-8 -*- from datetime import datetime import sys import os from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters if __name__ == '__main__': evaluator = Evaluator(stats=False) eval_iters = EvalIters() env, args = load_env(os.environ, overrides=dict(actor_only=True), evaluator=evaluator, eval_iters=eval_iters) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) env["mi"].add_model("actor", model, cuda=not args.gpu is None, gpu_id=args.gpu) env["mi"]["actor"].eval() def actor(batch): reply = evaluator.actor(batch) ''' s = batch["s"][0][0] seq = batch["seq"][0][0] for i in range(s.size(0)):