def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption( 'backprop', 'Whether to backprop the total loss', True) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addIntOption('num_minibatch', 'number of minibatches', 5000) spec.addIntOption('num_episode', 'number of episodes', 10000) spec.addIntOption('num_process', 'number of processes', 2) spec.addBoolOption('tqdm', 'toggle tqdm visualization', False) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption('bn', 'toggles batch norm', True) spec.addBoolOption('leaky_relu', 'toggles leaky ReLU', True) spec.addIntOption('num_layer', 'number of layers', 39) spec.addIntOption('dim', 'model dimension', 128) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrListOption( 'additional_labels', 'add additional labels in the batch; e.g. id, seq, last_terminal', []) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addFloatOption( 'discount', 'exponential discount rate', 0.99) return spec
def get_option_spec(cls, stats_name=''): spec = PyOptionSpec() spec.addStrOption( stats_name + '_stats', 'type of stat to report (rewards or winrate)', '') return spec
def get_option_spec(cls): # print("\u001b[31;1m|py|\u001b[0m\u001b[37m", "MCTSPrediction::", inspect.currentframe().f_code.co_name) spec = PyOptionSpec() spec.addBoolOption('backprop', 'Whether to backprop the total loss', True) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption('bn', 'toggles batch norm', True) spec.addBoolOption('leaky_relu', 'toggles leaky ReLU', False) spec.addFloatOption('bn_momentum', 'batch norm momentum (pytorch style)', 0.1) spec.addIntOption('dim', 'model dimension', 128) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'loglevel', ('Global log level. Choose from ' 'trace, debug, info, warning, error, critical, or off)'), 'info') return spec
def get_option_spec(cls, name='eval'): spec = PyOptionSpec() spec.addStrListOption('keys_in_reply', 'keys in reply', []) spec.addIntOption('num_minibatch', 'number of minibatches', 5000) spec.addStrListOption('parsed_args', 'dummy option', []) spec.merge(Stats.get_option_spec(name)) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addIntOption( 'num_block', 'number of resnet blocks', 20) spec.merge(Block.get_option_spec()) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addIntOption('num_minibatch', 'number of minibatches', 5000) spec.addIntOption( 'num_cooldown', 'Last #minibatches to refresh running mean/std for batchnorm ' 'in addition to the training stage', 0) spec.addIntOption('num_episode', 'number of episodes', 10000) spec.addBoolOption('tqdm', 'toggle tqdm visualization', False) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addIntOption( 'num_eval', 'number of games to evaluate', 500) spec.addBoolOption( 'tqdm', 'toggle tqdm visualization', False) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addFloatOption( 'grad_clip_norm', 'gradient norm clipping', 0.0) spec.addStrOption( 'value_node', 'name of the value node', 'V') return spec
def get_option_spec(cls, name='eval'): # print("\u001b[31;1m|py|\u001b[0m\u001b[37m", "Evaluator::", inspect.currentframe().f_code.co_name) # print("\u001b[31;1m", os.path.dirname(os.path.abspath(__file__)), " - ", os.path.basename(__file__), "\u001b[0m") spec = PyOptionSpec() spec.addStrListOption('keys_in_reply', 'keys in reply', []) spec.addIntOption('num_minibatch', 'number of minibatches', 5000) spec.addStrListOption('parsed_args', 'dummy option', '') spec.merge(Stats.get_option_spec(name)) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'sample_policy', 'choices of epsilon-greedy, multinomial, or uniform', 'epsilon-greedy') spec.addBoolOption('store_greedy', ('if enabled, picks maximum-probability action; ' 'otherwise, sample from distribution'), False) spec.addFloatOption('epsilon', 'used in epsilon-greedy', 0.0) spec.addStrListOption('sample_nodes', 'nodes to be sampled and saved', ['pi,a']) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption('bn', 'toggles batch norm', True) spec.addBoolOption('leaky_relu', 'toggles leaky ReLU', False) spec.addIntOption('gpu', 'which gpu to use', -1) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption('a_node', 'action node', 'a') spec.addStrOption('q_node', 'Q node', 'Q') spec.merge(DiscountedReward.get_option_spec()) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption('value_node', 'name of the value node', 'V') spec.addFloatOption('adv_clip', 'clip value of advantage. 0.0 means no clipping', 0.0) spec.merge( PyOptionSpec.fromClasses( (PolicyGradient, DiscountedReward, ValueMatcher))) return spec
def get_option_spec(cls): spec = PyOptionSpec() elf_C.setSpecELFOptions(spec.getOptionSpec()) test.setSpecTSOptions(spec.getOptionSpec()) spec.addIntOption( 'gpu', 'GPU id to use', 0) spec.addStrOption( 'load', 'Load old model', "") spec.addStrListOption( 'parsed_args', 'dummy option', []) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption('value_node', 'name of the value node', 'V') spec.merge( PyOptionSpec.fromClasses( (PolicyGradient, DiscountedReward, ValueMatcher))) return spec
def get_option_spec(cls, name='eval'): spec = PyOptionSpec() spec.addStrListOption( 'keys_in_reply', 'keys in reply', []) spec.addIntOption( 'num_minibatch', 'number of minibatches', 5000) spec.addStrListOption( 'parsed_args', 'dummy option', '') spec.merge(Stats.get_option_spec(name)) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption( 'bn', 'toggles batch norm', True) spec.addBoolOption( 'leaky_relu', 'toggles leaky ReLU', False) spec.addFloatOption( 'bn_momentum', 'batch norm momentum (pytorch style)', 0.1) spec.addIntOption( 'dim', 'model dimension', 128) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption( 'bn', 'toggles batch norm', True) spec.addBoolOption( 'leaky_relu', 'toggles leaky ReLU', True) spec.addIntOption( 'num_block', 'number of blocks', 20) spec.addIntOption( 'dim', 'model dimension', 128) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addIntOption( 'num_minibatch', 'number of minibatches', 5000) spec.addIntOption( 'num_episode', 'number of episodes', 10000) spec.addIntOption( 'num_process', 'number of processes', 2) spec.addBoolOption( 'tqdm', 'toggle tqdm visualization', False) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'sample_policy', 'choices of epsilon-greedy, multinomial, or uniform', 'epsilon-greedy') spec.addBoolOption( 'store_greedy', ('if enabled, picks maximum-probability action; ' 'otherwise, sample from distribution'), False) spec.addFloatOption( 'epsilon', 'used in epsilon-greedy', 0.0) spec.addStrListOption( 'sample_nodes', 'nodes to be sampled and saved', ['pi,a']) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addIntOption( 'num_minibatch', 'number of minibatches', 5000) spec.addIntOption( 'num_cooldown', 'Last #minibatches to refresh running mean/std for batchnorm ' 'in addition to the training stage', 0) spec.addIntOption( 'num_episode', 'number of episodes', 10000) spec.addBoolOption( 'tqdm', 'toggle tqdm visualization', False) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'a_node', 'action node', 'a') spec.addStrOption( 'q_node', 'Q node', 'Q') spec.merge(DiscountedReward.get_option_spec()) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'value_node', 'name of the value node', 'V') spec.merge(PyOptionSpec.fromClasses( (PolicyGradient, DiscountedReward, ValueMatcher) )) return spec
def get_option_spec(cls): spec = PyOptionSpec() elf.saveDefaultOptionsToArgs("", spec) elf.saveDefaultNetOptionsToArgs("", spec) spec.addIntOption( 'gpu', 'GPU id to use', -1) spec.addStrListOption( "parsed_args", "dummy option", []) return spec
def main(): print(sys.version) print(torch.__version__) print(torch.version.cuda) print("Conda env: \"%s\"" % os.environ.get("CONDA_DEFAULT_ENV", "")) option_spec = PyOptionSpec() option_spec.merge(PyOptionSpec.fromClasses((RunGC,))) option_map = option_spec.parse() rungc = RunGC(option_map) rungc.initialize() num_batch = 10000000 rungc.wrapper.start() for i in range(num_batch): rungc.wrapper.run() rungc.wrapper.stop()
def get_option_spec(cls): spec = PyOptionSpec() spec.addFloatOption('discount', 'exponential discount rate', 0.99) return spec
def on_batch(self, batch): #print("Receive batch: ", batch.smem.info(), # ", curr_batchsize: ", str(batch.batchsize), sep='') #print(batch["s"]) # print("Actor: " + str(datetime.timestamp(datetime.now()))) #print("on actor") # n = self.params["num_action"] # print(batch.batchsize) res = self.model(batch) res["V"].data /= 10.0 #print("s: %f, V: %f, pi: %s" % (batch["s"].data.item(), res["V"].data.item(), str(res["pi"].data))) return dict(V=res["V"].data, pi=res["pi"].data) if __name__ == '__main__': option_spec = PyOptionSpec() option_spec.merge(PyOptionSpec.fromClasses((RunGC,))) option_map = option_spec.parse() rungc = RunGC(option_map) rungc.initialize() num_batch = 10000000 start = time.perf_counter() rungc.wrapper.start() for i in range(num_batch): rungc.wrapper.run() elapsed = time.perf_counter() - start
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'record_dir', 'directory to record in', './record') spec.addStrOption( 'save_prefix', 'prefix of savefiles', 'save') spec.addStrOption( 'save_dir', 'directory for savefiles', os.environ.get('save', './')) spec.addStrOption( 'latest_symlink', 'name for latest model symlink', 'latest') spec.addIntOption( 'num_games', 'number of games', 1024) spec.addIntOption( 'batchsize', 'batch size', 128) return spec
def get_option_spec(cls, stats_name=''): spec = PyOptionSpec() spec.addStrOption(stats_name + '_stats', 'type of stat to report (rewards or winrate)', '') return spec
def get_option_spec(cls): spec = PyOptionSpec() tutorial.getPredefined(spec.getOptionSpec()) spec.addIntOption( 'client_dummy', 'Some dummy arguments', -1) spec.addStrOption( 'client_dummy2', 'some string dummy arguments', '') spec.addBoolOption( "client_dummy3", "Some boolean dummy arguments", True) spec.addIntOption( 'gpu', 'GPU id to use', -1) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption('bn', 'toggles batch norm', True) spec.addBoolOption('leaky_relu', 'toggles leaky ReLU', False) spec.addFloatOption('bn_momentum', 'batch norm momentum (pytorch style)', 0.1) spec.addIntOption('num_block', 'number of blocks', 20) spec.addIntOption('dim', 'model dimension', 128) spec.addBoolOption('use_data_parallel', 'TODO: fill this in', False) spec.addBoolOption('use_data_parallel_distributed', 'TODO: fill this in', False) spec.addIntOption('dist_rank', 'TODO: fill this in', -1) spec.addIntOption('dist_world_size', 'TODO: fill this in', -1) spec.addStrOption('dist_url', 'TODO: fill this in', '') spec.addIntOption('gpu', 'which gpu to use', -1) spec.merge(GoResNet.get_option_spec()) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addFloatOption('entropy_ratio', 'the entropy ratio we put on PolicyGradient', 0.01) spec.addFloatOption('grad_clip_norm', 'gradient norm clipping', 0.0) spec.addFloatOption('min_prob', 'mininal probability used in training', 1e-6) spec.addFloatOption('ratio_clamp', 'maximum importance sampling ratio', 10.0) spec.addStrListOption('policy_action_nodes', 'the entropy ratio we put on PolicyGradient', ['pi,a']) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption( 'bn', 'toggles batch norm', True) spec.addBoolOption( 'leaky_relu', 'toggles leaky ReLU', False) spec.addFloatOption( 'bn_momentum', 'batch norm momentum (pytorch style)', 0.1) spec.addIntOption( 'num_block', 'number of blocks', 20) spec.addIntOption( 'dim', 'model dimension', 128) spec.addBoolOption( 'use_data_parallel', 'TODO: fill this in', False) spec.addBoolOption( 'use_data_parallel_distributed', 'TODO: fill this in', False) spec.addIntOption( 'dist_rank', 'TODO: fill this in', -1) spec.addIntOption( 'dist_world_size', 'TODO: fill this in', -1) spec.addStrOption( 'dist_url', 'TODO: fill this in', '') spec.addIntOption( 'gpu', 'which gpu to use', -1) spec.merge(GoResNet.get_option_spec()) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'preload_sgf', 'TODO: fill this help message in', '') spec.addIntOption( 'preload_sgf_move_to', 'TODO: fill this help message in', -1) spec.addBoolOption( 'actor_only', 'TODO: fill this help message in', False) spec.addStrListOption( 'list_files', 'Provide a list of json files for offline training', []) spec.addIntOption( 'port', 'TODO: fill this help message in', 5556) spec.addStrOption( 'server_addr', 'TODO: fill this help message in', '') spec.addStrOption( 'server_id', 'TODO: fill this help message in', '') spec.addIntOption( 'q_min_size', 'TODO: fill this help message in', 10) spec.addIntOption( 'q_max_size', 'TODO: fill this help message in', 1000) spec.addIntOption( 'num_reader', 'TODO: fill this help message in', 50) spec.addIntOption( 'num_reset_ranking', 'TODO: fill this help message in', 5000) spec.addIntOption( 'client_max_delay_sec', 'Maximum amount of allowed delays in sec. If the client ' 'didn\'t respond after that, we think it is dead.', 1200) spec.addBoolOption( 'verbose', 'TODO: fill this help message in', False) spec.addBoolOption( 'keep_prev_selfplay', 'TODO: fill this help message in', False) spec.addBoolOption( 'print_result', 'TODO: fill this help message in', False) spec.addIntOption( 'data_aug', 'specify data augumentation, 0-7, -1 mean random', -1) spec.addIntOption( 'ratio_pre_moves', ('how many moves to perform in each thread, before we use the ' 'data to train the model'), 0) spec.addFloatOption( 'start_ratio_pre_moves', ('how many moves to perform in each thread, before we use the ' 'first sgf file to train the model'), 0.5) spec.addIntOption( 'num_games_per_thread', ('For offline mode, it is the number of concurrent games per ' 'thread, used to increase diversity of games; for selfplay mode, ' 'it is the number of games played at each thread, and after that ' 'we need to call restartAllGames() to resume.'), -1) spec.addIntOption( 'expected_num_clients', 'Expected number of clients', -1 ) spec.addIntOption( 'num_future_actions', 'TODO: fill this help message in', 1) spec.addIntOption( 'move_cutoff', 'Cutoff ply in replay', -1) spec.addStrOption( 'mode', 'TODO: fill this help message in', 'online') spec.addBoolOption( 'black_use_policy_network_only', 'TODO: fill this help message in', False) spec.addBoolOption( 'white_use_policy_network_only', 'TODO: fill this help message in', False) spec.addIntOption( 'ply_pass_enabled', 'TODO: fill this help message in', 0) spec.addBoolOption( 'use_mcts', 'TODO: fill this help message in', False) spec.addBoolOption( 'use_mcts_ai2', 'TODO: fill this help message in', False) spec.addFloatOption( 'white_puct', 'PUCT for white when it is > 0.0. If it is -1 then we use' 'the same puct for both side (specified by mcts_options).' 'A HACK to use different puct for different model. Should' 'be replaced by a more systematic approach.', -1.0) spec.addIntOption( 'white_mcts_rollout_per_batch', 'white mcts rollout per batch', -1) spec.addIntOption( 'white_mcts_rollout_per_thread', 'white mcts rollout per thread', -1) spec.addBoolOption( 'use_df_feature', 'TODO: fill this help message in', False) spec.addStrOption( 'dump_record_prefix', 'TODO: fill this help message in', '') spec.addIntOption( 'policy_distri_cutoff', 'TODO: fill this help message in', 0) spec.addFloatOption( 'resign_thres', 'TODO: fill this help message in', 0.0) spec.addBoolOption( 'following_pass', 'TODO: fill this help message in', False) spec.addIntOption( 'selfplay_timeout_usec', 'TODO: fill this help message in', 0) spec.addIntOption( 'gpu', 'TODO: fill this help message in', -1) spec.addBoolOption( 'policy_distri_training_for_all', 'TODO: fill this help message in', False) spec.addBoolOption( 'parameter_print', 'TODO: fill this help message in', True) spec.addIntOption( 'batchsize', 'batch size', 128) spec.addIntOption( 'batchsize2', 'batch size', -1) spec.addIntOption( 'T', 'number of timesteps', 6) spec.addIntOption( 'selfplay_init_num', ('Initial number of selfplay games to generate before training a ' 'new model'), 2000) spec.addIntOption( 'selfplay_update_num', ('Additional number of selfplay games to generate after a model ' 'is updated'), 1000) spec.addBoolOption( 'selfplay_async', ('Whether to use async mode in selfplay'), False) spec.addIntOption( 'eval_num_games', ('number of evaluation to be performed to decide whether a model ' 'is better than the other'), 400) spec.addFloatOption( 'eval_winrate_thres', 'Win rate threshold for evalution', 0.55) spec.addIntOption( 'eval_old_model', ('If specified, then we directly switch to evaluation mode ' 'between the loaded model and the old model specified by this ' 'switch'), -1) spec.addStrOption( 'eval_model_pair', ('If specified for df_selfplay.py, then the two models will be ' 'evaluated on this client'), '') spec.addStrOption( 'comment', 'Comment for this run', '') spec.addBoolOption( 'cheat_eval_new_model_wins_half', 'When enabled, in evaluation mode, when the game ' 'finishes, the player with the most recent model gets 100% ' 'win rate half of the time.' 'This is used to test the framework', False) spec.addBoolOption( 'cheat_selfplay_random_result', 'When enabled, in selfplay mode the result of the game is random' 'This is used to test the framework', False) spec.addIntOption( 'suicide_after_n_games', 'return after n games have finished, -1 means it never ends', -1) spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels))) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'opt_method', 'optimization method (adam or sgd)', 'adam') spec.addFloatOption( 'lr', 'learning rate', 1e-3) spec.addFloatOption( 'adam_eps', 'Adam epsilon', 1e-3) spec.addFloatOption( 'momentum', 'momentum parameter', 0.9) spec.addFloatOption( 'weight_decay', 'weight decay rate', 0.0) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption('record_dir', 'directory to record in', './record') spec.addStrOption('save_prefix', 'prefix of savefiles', 'save') spec.addStrOption('save_dir', 'directory for savefiles', os.environ.get('save', './')) spec.addStrOption('latest_symlink', 'name for latest model symlink', 'latest') spec.addIntOption('num_games', 'number of games', 1024) spec.addIntOption('batchsize', 'batch size', 128) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addFloatOption( 'entropy_ratio', 'the entropy ratio we put on PolicyGradient', 0.01) spec.addFloatOption( 'grad_clip_norm', 'gradient norm clipping', 0.0) spec.addFloatOption( 'min_prob', 'mininal probability used in training', 1e-6) spec.addFloatOption( 'ratio_clamp', 'maximum importance sampling ratio', 10.0) spec.addStrListOption( 'policy_action_nodes', 'the entropy ratio we put on PolicyGradient', ['pi,a']) return spec
def load_env( envs, num_models=None, overrides=None, additional_to_load=None): """Load envs. Envs will be specified as environment variables. Specifically, the environment variables ``game``, ``model_file`` and ``model`` are required. ``additional_to_load`` is a dict with the following format: {'variable_name': (option_spec, callable)} For each element in ``additional_to_load``, ``load_env`` will parse the ``option_spec``, pass the resulting option map to ``callable``, and store the result of ``callable`` in the return value (under the key ``name``). Returns: env: dict of ``game`` : game module ``method``: Learning method used ``model_loaders``: loaders for model """ logger = logging.getIndexedLogger('\u001b[31;1m|py|\u001b[0mrlpytorch.model_loader.load_env', '') logger.info('Loading env') game_loader_class = load_module(envs["game"]).Loader logger.info(f'\u001b[32;1mModule game successfully loaded :\u001b[0m {envs["game"]}') model_file = load_module(envs["model_file"]) logger.info(f'\u001b[32;1mModule model_file successfully loaded :\u001b[0m {envs["model_file"]}') # TODO This is not good, need to fix. if len(model_file.Models[envs["model"]]) == 2: model_class, method_class = model_file.Models[envs["model"]] sampler_class = Sampler else: model_class, method_class, sampler_class = \ model_file.Models[envs["model"]] overrides = dict(overrides) if overrides else {} overrides.update(getattr(model_file, "Overrides", {})) option_spec = PyOptionSpec() option_spec.merge(PyOptionSpec.fromClasses(( logging.GlobalLoggingConfigurator, game_loader_class, method_class, sampler_class, ModelInterface, ))) if num_models is None: option_spec.merge(ModelLoader.get_option_spec(model_class)) else: for i in range(num_models): option_spec.merge( ModelLoader.get_option_spec(model_class, model_idx=i)) if additional_to_load: for additional_option_spec, _ in additional_to_load.values(): option_spec.merge(additional_option_spec) option_map = option_spec.parse(overrides=overrides) global_logger_configurator = logging.GlobalLoggingConfigurator(option_map) global_logger_configurator.configure() game = game_loader_class(option_map) method = method_class(option_map) sampler = sampler_class(option_map) mi = ModelInterface(option_map) # You might want multiple models loaded. if num_models is None: model_loaders = [ModelLoader(option_map, model_class)] else: model_loaders = [ModelLoader(option_map, model_class, model_idx=i) for i in range(num_models)] env = dict( game=game, method=method, sampler=sampler, model_loaders=model_loaders, mi=mi, ) if additional_to_load: for name, (_, option_map_callable) in additional_to_load.items(): env[name] = option_map_callable(option_map) pretty_option_str = pprint.pformat(option_map.getOptionDict(), width=50) logger.info(f'Parsed options:\n{pretty_option_str}') logger.info('Finished loading env') # env_str = "" # for i in env: # env_str += f"{i} : {env[i]}\n" # logger.info(f"env :\n{env_str}") return env
def get_option_spec(cls, model_class=None, model_idx=None): spec = PyOptionSpec() spec.addStrOption( 'load', 'load model', '') spec.addStrListOption( 'onload', ('functions to call after loading. e.g., reset,zero_first_layer. ' 'These functions are specified in the model'), []) spec.addStrListOption( 'omit_keys', 'omitted keys when loading', []) spec.addStrListOption( 'replace_prefix', 'replace prefix', []) spec.addIntOption( 'gpu', 'which GPU to use', -1) spec.addBoolOption( 'check_loaded_options', 'Toggles consistency check of loaded vs. current model options.', True) spec.addBoolOption( 'use_fp16', 'use_fp16', False) spec.addFloatOption( 'load_model_sleep_interval', ('If zero, has no effect. If positive, then before loading the ' 'model, we will sleep for an interval of ' 'duration (secs) ~ Uniform[0, load_model_sleep_interval]'), 0.0) if model_class is not None and hasattr(model_class, 'get_option_spec'): spec.merge(model_class.get_option_spec()) idx_suffix = '' if model_idx is None else str(model_idx) spec.addPrefixSuffixToOptionNames('', idx_suffix) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addIntOption( 'freq_update', 'frequency of model update', 1) spec.addBoolOption( 'save_first', 'save first model', False) spec.addIntOption( 'num_games', 'number of games', 1024) spec.addIntOption( 'batchsize', 'batch size', 128) # print("\u001b[31;1m|py|\u001b[0m\u001b[37m", "Trainer::", inspect.currentframe().f_code.co_name) # print("\u001b[31;1m", os.path.dirname(os.path.abspath(__file__)), " - ", os.path.basename(__file__), "\u001b[0m") spec.merge(Evaluator.get_option_spec('trainer')) spec.merge(ModelSaver.get_option_spec()) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addIntOption( 'freq_update', 'frequency of model update', 1) spec.addBoolOption( 'save_first', 'save first model', False) spec.addIntOption( 'num_games', 'number of games', 1024) spec.addIntOption( 'batchsize', 'batch size', 128) spec.merge(Evaluator.get_option_spec('trainer')) spec.merge(ModelSaver.get_option_spec()) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption('actor_only', 'TODO: fill this help message in', False) spec.addStrListOption( 'list_files', 'Provide a list of json files for offline training', []) spec.addIntOption('port', 'TODO: fill this help message in', 5556) spec.addStrOption('server_addr', 'TODO: fill this help message in', '') spec.addStrOption('server_id', 'TODO: fill this help message in', '') spec.addIntOption('q_min_size', 'TODO: fill this help message in', 10) spec.addIntOption('q_max_size', 'TODO: fill this help message in', 1000) spec.addIntOption('num_reader', 'TODO: fill this help message in', 50) spec.addIntOption('num_reset_ranking', 'TODO: fill this help message in', 5000) spec.addIntOption( 'client_max_delay_sec', 'Maximum amount of allowed delays in sec. If the client ' 'didn\'t respond after that, we think it is dead.', 1200) spec.addBoolOption('verbose', 'TODO: fill this help message in', False) spec.addBoolOption('keep_prev_selfplay', 'TODO: fill this help message in', False) spec.addIntOption( 'num_games_per_thread', ('For offline mode, it is the number of concurrent games per ' 'thread, used to increase diversity of games; for selfplay mode, ' 'it is the number of games played at each thread, and after that ' 'we need to call restartAllGames() to resume.'), -1) spec.addIntOption('expected_num_clients', 'Expected number of clients', -1) spec.addIntOption('checkers_num_future_actions', 'TODO: fill this help message in', 1) spec.addStrOption('mode', 'TODO: fill this help message in', 'play') spec.addBoolOption('black_use_policy_network_only', 'TODO: fill this help message in', False) spec.addBoolOption('white_use_policy_network_only', 'TODO: fill this help message in', False) spec.addBoolOption('use_mcts', 'TODO: fill this help message in', False) spec.addBoolOption('use_mcts_ai2', 'TODO: fill this help message in', False) spec.addFloatOption( 'white_puct', 'PUCT for white when it is > 0.0. If it is -1 then we use' 'the same puct for both side (specified by mcts_options).' 'A HACK to use different puct for different model. Should' 'be replaced by a more systematic approach.', -1.0) spec.addIntOption('white_mcts_rollout_per_batch', 'white mcts rollout per batch', -1) spec.addIntOption('white_mcts_rollout_per_thread', 'white mcts rollout per thread', -1) spec.addStrOption('dump_record_prefix', 'TODO: fill this help message in', '') spec.addStrOption('selfplay_records_directory', 'TODO: fill this help message in', '') spec.addStrOption('eval_records_directory', 'TODO: fill this help message in', '') spec.addStrOption('records_buffer_directory', 'TODO: fill this help message in', '') spec.addIntOption('policy_distri_cutoff', 'first N moves will be randomly', 0) spec.addIntOption('selfplay_timeout_usec', 'TODO: fill this help message in', 0) spec.addIntOption('gpu', 'TODO: fill this help message in', -1) spec.addBoolOption('policy_distri_training_for_all', 'TODO: fill this help message in', False) spec.addBoolOption('parameter_print', 'TODO: fill this help message in', True) spec.addIntOption('batchsize', 'batch size', 128) spec.addIntOption('batchsize2', 'batch size', -1) spec.addIntOption('T', 'number of timesteps', 6) spec.addIntOption( 'selfplay_init_num', ('Initial number of selfplay games to generate before training a ' 'new model'), 2000) spec.addIntOption( 'selfplay_update_num', ('Additional number of selfplay games to generate after a model ' 'is updated'), 1000) spec.addBoolOption('selfplay_async', ('Whether to use async mode in selfplay'), False) spec.addIntOption( 'eval_num_games', ('number of evaluation to be performed to decide whether a model ' 'is better than the other'), 400) spec.addFloatOption('eval_winrate_thres', 'Win rate threshold for evalution', 0.55) spec.addIntOption( 'eval_old_model', ('If specified, then we directly switch to evaluation mode ' 'between the loaded model and the old model specified by this ' 'switch'), -1) spec.addStrOption( 'eval_model_pair', ('If specified for df_selfplay.py, then the two models will be ' 'evaluated on this client'), '') spec.addBoolOption( 'cheat_eval_new_model_wins_half', 'When enabled, in evaluation mode, when the game ' 'finishes, the player with the most recent model gets 100% ' 'win rate half of the time.' 'This is used to test the framework', False) spec.addBoolOption( 'cheat_selfplay_random_result', 'When enabled, in selfplay mode the result of the game is random' 'This is used to test the framework', False) spec.addBoolOption('human_plays_for_black', '', False) spec.addIntOption( 'suicide_after_n_games', 'return after n games have finished, -1 means it never ends', -1) spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels))) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addIntOption( 'num_games', 'number of games', 1024) spec.addIntOption( 'batchsize', 'batch size', 128) spec.addIntOption( 'T', 'number of timesteps', 6) spec.addBoolOption( 'verbose_comm', 'enables verbose comm', False) spec.addIntOption( 'mcts_threads', 'number of MCTS threads', 0) spec.addIntOption( 'mcts_rollout_per_batch', 'Batch size for mcts rollout', 1) spec.addIntOption( 'mcts_rollout_per_thread', 'number of rollotus per MCTS thread', 1) spec.addBoolOption( 'mcts_verbose', 'enables mcts verbosity', False) spec.addBoolOption( 'mcts_verbose_time', 'enables mcts verbosity for time stats', False) spec.addBoolOption( 'mcts_persistent_tree', 'use persistent tree in MCTS', False) spec.addBoolOption( 'mcts_use_prior', 'use prior in MCTS', False) spec.addIntOption( 'mcts_virtual_loss', '"virtual" number of losses for MCTS edges', 0) spec.addStrOption( 'mcts_pick_method', 'criterion for mcts node selection', 'most_visited') spec.addFloatOption( 'mcts_puct', 'prior weight', 1.0) spec.addFloatOption( 'mcts_epsilon', 'for exploration enhancement, weight of randomization', 0.0) spec.addFloatOption( 'mcts_alpha', 'for exploration enhancement, alpha term in gamma distribution', 0.0) spec.addBoolOption( "mcts_unexplored_q_zero", 'set all unexplored node to have Q value zero', False) spec.addBoolOption( "mcts_root_unexplored_q_zero", 'set unexplored child of root node to have Q value zero', False) return spec