def test_mock_load_env(): init_for_test() from rlpytorch import (Trainer, SingleProcessRun, ArgsProvider, ModelLoader, model_loader, Sampler, ModelInterface) envs = os.environ load_module = model_loader.load_module defaults = dict() overrides = dict() num_models = None kwargs = {} trainer = Trainer() runner = SingleProcessRun() game = load_module(envs["game"]).Loader() model_file = load_module(envs["model_file"]) if len(model_file.Models[envs["model"]]) == 2: model_class, method_class = model_file.Models[envs["model"]] sampler_class = Sampler else: model_class, method_class, sampler_class = model_file.Models[ envs["model"]] defaults.update(getattr(model_file, "Defaults", dict())) overrides.update(getattr(model_file, "Overrides", dict())) method = method_class() sampler = sampler_class() mi = ModelInterface() # You might want multiple models loaded. if num_models is None: model_loaders = [ModelLoader(model_class)] else: model_loaders = [ ModelLoader(model_class, model_idx=i) for i in range(num_models) ] env = dict(game=game, method=method, sampler=sampler, model_loaders=model_loaders, mi=mi) env.update(kwargs) parser = argparse.ArgumentParser() # 模拟命令行 cmd_key = 'save_replay_prefix' cmd_v = '~/log/elf/' cmd_line = [f'--{cmd_key}', cmd_v] all_args = ArgsProvider.Load(parser, env, cmd_line=cmd_line, global_defaults=defaults, global_overrides=overrides) assert all_args[cmd_key] == cmd_v assert 'game' in env.keys()
def main(): print(sys.version) print(torch.__version__) print(torch.version.cuda) print("Conda env: \"%s\"" % os.environ.get("CONDA_DEFAULT_ENV", "")) additional_to_load = { 'trainer': (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'runner': (SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, additional_to_load=additional_to_load) trainer = env['trainer'] runner = env['runner'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) env["mi"].add_model("model", model, opt=True) GC.reg_callback("train", trainer.train) if GC.reg_has_callback("actor"): args = env["game"].options env["mi"].add_model("actor", model, copy=True, cuda=(args.gpu >= 0), gpu_id=args.gpu) GC.reg_callback("actor", trainer.actor) trainer.setup(sampler=env["sampler"], mi=env["mi"], rl_method=env["method"]) runner.setup(GC, episode_summary=trainer.episode_summary, \ episode_start=trainer.episode_start) runner.run()
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) logger = logging.getIndexedLogger( '\u001b[31;1m|py|\u001b[0melfgames.checkers.train-', '') # Trainer is also a pure python class wrapped on evaluator. # Train the models. # Runner - seems run all this shit. additional_to_load = { 'trainer': (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'runner': (SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, additional_to_load=additional_to_load) trainer = env['trainer'] runner = env['runner'] """ Initializes keys('train', 'train_ctrl') for communication Python and C++ code, defined in Game.py and GameFeature.h. Also, initializes GameContext from C++ library wrapped by GC from python side + sets mode that parsed from options like play/selfplay/train/offline_train. """ GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) env["mi"].add_model("model", model, opt=True) keep_prev_selfplay = env["game"].options.keep_prev_selfplay model_ver = 0 # Грузим модель, если указана model_filename = model_loader.options.load if isinstance(model_filename, str) and model_filename != "": realpath = os.path.realpath(model_filename) m = matcher.match(os.path.basename(realpath)) if m: model_ver = int(m.group(1)) eval_old_model = env["game"].options.eval_old_model # if eval_old_model >= 0: GC.GC.getServer().setEvalMode(model_ver, eval_old_model) else: GC.GC.getServer().setInitialVersion(model_ver) checkers_selfplay_ver = model_ver root = os.environ["save"] print(f'Save models in\t\t: "{root}"') print(f'Keep prev_selfplay\t: {keep_prev_selfplay!s}') def train(batch, *args, **kwargs): # Check whether the version match. if keep_prev_selfplay or \ (batch["checkers_selfplay_ver"] != checkers_selfplay_ver).sum() == 0: trainer.train(batch, *args, **kwargs) else: print(f'Get batch whose selfplay ver is different from ' f'{checkers_selfplay_ver}, skipping') runner.inc_episode_counter(-1) def train_ctrl(batch, *args, **kwargs): nonlocal checkers_selfplay_ver old_selfplay_ver = checkers_selfplay_ver checkers_selfplay_ver = int(batch["checkers_selfplay_ver"][0]) logger.info( f'Train ctrl: checkers_selfplay_ver: {old_selfplay_ver} -> {checkers_selfplay_ver}' ) # ожидаем нормально запоненого батча от клиентов GC.GC.getServer().ServerWaitForSufficientSelfplay( checkers_selfplay_ver) # Reload old models. real_path = os.path.join(root, "save-" + str(checkers_selfplay_ver) + ".bin") model_loader.options.load = real_path while True: try: model = model_loader.load_model(GC.params) break except BaseException: time.sleep(10) env["mi"].remove_model("model") env["mi"].add_model("model", model, opt=True) trainer.episode_reset() runner.set_episode_counter(-1) GC.reg_callback("train", train) GC.reg_callback("train_ctrl", train_ctrl) if GC.reg_has_callback("actor"): args = env["game"].options env["mi"].add_model("actor", model, copy=True, cuda=(args.gpu >= 0), gpu_id=args.gpu) GC.reg_callback("actor", trainer.actor) # +++++++++++++++++++++++++++++++++++++++++++++++ # +++++++++++++++++++++++++++++++++++++++++++++++ # +++++++++++++++++++++++++++++++++++++++++++++++ trainer.setup(sampler=env["sampler"], mi=env["mi"], rl_method=env["method"]) def episode_summary(i): nonlocal checkers_selfplay_ver logger.info("Episode_summary") ver = trainer.episode_summary(i) # This might block (when evaluation does not catch up with training). GC.GC.getServer().notifyNewVersion(checkers_selfplay_ver, ver) offline_training = (env["game"].options.mode == "offline_train") def after_start(): logger.info("after_start") nonlocal checkers_selfplay_ver if not offline_training: GC.GC.getServer().ServerWaitForSufficientSelfplay( checkers_selfplay_ver) # sys.exit(0) runner.setup(GC, after_start=after_start, episode_summary=episode_summary, episode_start=trainer.episode_start) runner.run_singe_process()
# LICENSE file in the root directory of this source tree. #!/usr/bin/env python # -*- coding: utf-8 -*- import argparse from datetime import datetime import sys import os from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface if __name__ == '__main__': trainer = LSTMTrainer() runner = SingleProcessRun() env, all_args = load_env(os.environ, trainer=trainer, runner=runner) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model, optim_params={ "lr" : 0.001}) mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu) trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"]) GC.reg_callback("train", trainer.train) GC.reg_callback("actor", trainer.actor) runner.setup(GC, episode_summary=trainer.episode_summary, episode_start=trainer.episode_start)
# Check whether the actions remains the same. if t < T - 1: key = (id, sel["seq"][t][i], sel["game_counter"][t][i]) recorded_a = self.idgseq2action[key] actual_a = sel["a"][t][i] if recorded_a != actual_a: self._debug( "%s Action was different. recorded %d, actual %d" % (prompt, recorded_a, actual_a)) # Overlapped by 1. self.id2seqs_train[id] = last_seq - 1 if __name__ == '__main__': collector = StatsCollector() runner = SingleProcessRun() env, all_args = load_env(os.environ, collector=collector, runner=runner) GC = env["game"].initialize() # GC.setup_gpu(0) collector.set_params(GC.params) GC.reg_callback("actor", collector.actor) GC.reg_callback("train", collector.train) GC.reg_sig_int() runner.setup(GC) runner.run()
import os import re import time from rlpytorch import load_env, SingleProcessRun, Trainer matcher = re.compile(r"save-(\d+).bin") if __name__ == '__main__': additional_to_load = { 'trainer': ( Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'runner': ( SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, additional_to_load=additional_to_load) trainer = env['trainer'] runner = env['runner'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) env["mi"].add_model("model", model, opt=True) keep_prev_selfplay = env["game"].options.keep_prev_selfplay
# LICENSE file in the root directory of this source tree. #!/usr/bin/env python # -*- coding: utf-8 -*- import argparse from datetime import datetime import sys import os from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface if __name__ == '__main__': trainer = LSTMTrainer() runner = SingleProcessRun() env, all_args = load_env(os.environ, trainer=trainer, runner=runner) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model, optim_params={"lr": 0.001}) mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu) trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"])
matcher = re.compile(r"save-(\d+).bin") if __name__ == '__main__': additional_to_load = { 'trainer0': ( Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'trainer1': ( Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'mi0': ( ModelInterface.get_option_spec(), ModelInterface), 'mi1': ( ModelInterface.get_option_spec(), ModelInterface), 'runner': ( SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, num_models=2, additional_to_load=additional_to_load, overrides=dict(backprop0=False, backprop1=False, mode="offline_train")) trainer0 = env['trainer0'] trainer1 = env['trainer1'] runner = env['runner'] GC = env["game"].initialize() for i in range(2):
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) additional_to_load = { 'trainer': (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'runner': (SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, additional_to_load=additional_to_load) trainer = env['trainer'] runner = env['runner'] GC = env["game"].initialize() model_loader = env["model_loaders"][0] model = model_loader.load_model(GC.params) env["mi"].add_model("model", model, opt=True) keep_prev_selfplay = env["game"].options.keep_prev_selfplay model_ver = 0 model_filename = model_loader.options.load if isinstance(model_filename, str) and model_filename != "": realpath = os.path.realpath(model_filename) m = matcher.match(os.path.basename(realpath)) if m: model_ver = int(m.group(1)) eval_old_model = env["game"].options.eval_old_model if eval_old_model >= 0: GC.GC.getServer().setEvalMode(model_ver, eval_old_model) else: GC.GC.getServer().setInitialVersion(model_ver) selfplay_ver = model_ver root = os.environ["save"] print(f'Root: "{root}"') print(f'Keep prev_selfplay: {keep_prev_selfplay!s}') def train(batch, *args, **kwargs): # Check whether the version match. if keep_prev_selfplay or \ (batch["selfplay_ver"] != selfplay_ver).sum() == 0: trainer.train(batch, *args, **kwargs) else: print(f'Get batch whose selfplay ver is different from ' f'{selfplay_ver}, skipping') runner.inc_episode_counter(-1) def train_ctrl(batch, *args, **kwargs): nonlocal selfplay_ver old_selfplay_ver = selfplay_ver selfplay_ver = int(batch["selfplay_ver"][0]) print( f'Train ctrl: selfplay_ver: {old_selfplay_ver} -> {selfplay_ver}') GC.GC.getServer().waitForSufficientSelfplay(selfplay_ver) # Reload old models. real_path = os.path.join(root, "save-" + str(selfplay_ver) + ".bin") model_loader.options.load = real_path while True: try: model = model_loader.load_model(GC.params) break except BaseException: time.sleep(10) env["mi"].remove_model("model") env["mi"].add_model("model", model, opt=True) trainer.episode_reset() runner.set_episode_counter(-1) GC.reg_callback("train", train) GC.reg_callback("train_ctrl", train_ctrl) if GC.reg_has_callback("actor"): args = env["game"].options env["mi"].add_model("actor", model, copy=True, cuda=(args.gpu >= 0), gpu_id=args.gpu) GC.reg_callback("actor", trainer.actor) trainer.setup(sampler=env["sampler"], mi=env["mi"], rl_method=env["method"]) def episode_summary(i): nonlocal selfplay_ver ver = trainer.episode_summary(i) # This might block (when evaluation does not catch up with training). GC.GC.getServer().notifyNewVersion(selfplay_ver, ver) offline_training = (env["game"].options.mode == "offline_train") def after_start(): nonlocal selfplay_ver if not offline_training: print("About to wait for sufficient selfplay") GC.GC.getServer().waitForSufficientSelfplay(selfplay_ver) runner.setup(GC, after_start=after_start, episode_summary=episode_summary, episode_start=trainer.episode_start) runner.run()
if sel["seq"][t][i] != last_seq + 1: self._debug("%s. Invalid next seq. seq should be %d" % (prompt, last_seq + 1)) last_seq += 1 # Check whether the actions remains the same. if t < T - 1: key = (id, sel["seq"][t][i], sel["game_counter"][t][i]) recorded_a = self.idgseq2action[key] actual_a = sel["a"][t][i] if recorded_a != actual_a: self._debug("%s Action was different. recorded %d, actual %d" % (prompt, recorded_a, actual_a)) # Overlapped by 1. self.id2seqs_train[id] = last_seq - 1 if __name__ == '__main__': collector = StatsCollector() runner = SingleProcessRun() env, all_args = load_env(os.environ, collector=collector, runner=runner) GC = env["game"].initialize() # GC.setup_gpu(0) collector.set_params(GC.params) GC.reg_callback("actor", collector.actor) GC.reg_callback("train", collector.train) GC.reg_sig_int() runner.setup(GC) runner.run()