示例#1
0
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addStrOption('value_node', 'name of the value node', 'V')

        spec.merge(
            PyOptionSpec.fromClasses(
                (PolicyGradient, DiscountedReward, ValueMatcher)))

        return spec
示例#2
0
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addStrOption('preload_sgf', 'TODO: fill this help message in', '')
        spec.addIntOption('preload_sgf_move_to',
                          'TODO: fill this help message in', -1)
        spec.addStrOption('mode', 'TODO: fill this help message in', "online")
        spec.addBoolOption('actor_only', 'TODO: fill this help message in',
                           False)
        spec.addIntOption('num_reset_ranking',
                          'TODO: fill this help message in', 5000)
        spec.addBoolOption('verbose', 'TODO: fill this help message in', False)
        spec.addBoolOption('print_result', 'TODO: fill this help message in',
                           False)
        spec.addIntOption('data_aug',
                          'specify data augumentation, 0-7, -1 mean random',
                          -1)
        spec.addIntOption(
            'num_games_per_thread',
            ('For offline mode, it is the number of concurrent games per '
             'thread, used to increase diversity of games; for selfplay mode, '
             'it is the number of games played at each thread, and after that '
             'we need to call restartAllGames() to resume.'), -1)
        spec.addIntOption('num_future_actions',
                          'TODO: fill this help message in', 1)
        spec.addIntOption('move_cutoff', 'Cutoff ply in replay', -1)
        spec.addStrOption('mode', 'TODO: fill this help message in', 'online')
        spec.addBoolOption('black_use_policy_network_only',
                           'TODO: fill this help message in', False)
        spec.addIntOption('ply_pass_enabled',
                          'TODO: fill this help message in', 0)
        spec.addBoolOption('use_mcts', 'TODO: fill this help message in',
                           False)
        spec.addBoolOption('use_df_feature', 'TODO: fill this help message in',
                           False)
        spec.addStrOption('dump_record_prefix',
                          'TODO: fill this help message in', '')
        spec.addFloatOption('resign_thres', 'TODO: fill this help message in',
                            0.0)
        spec.addBoolOption('following_pass', 'TODO: fill this help message in',
                           False)
        spec.addIntOption('gpu', 'TODO: fill this help message in', -1)
        spec.addBoolOption('parameter_print',
                           'TODO: fill this help message in', True)
        spec.addIntOption('batchsize', 'batch size', 128)
        spec.addIntOption('batchsize2', 'batch size', -1)
        spec.addFloatOption('eval_winrate_thres',
                            'Win rate threshold for evalution', 0.55)
        spec.addIntOption(
            'suicide_after_n_games',
            'return after n games have finished, -1 means it never ends', -1)

        spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels)))

        return spec
示例#3
0
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addStrOption(
            'value_node',
            'name of the value node',
            'V')

        spec.merge(PyOptionSpec.fromClasses(
            (PolicyGradient, DiscountedReward, ValueMatcher)
        ))

        return spec
示例#4
0
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addStrOption('value_node', 'name of the value node', 'V')
        spec.addFloatOption('adv_clip',
                            'clip value of advantage. 0.0 means no clipping',
                            0.0)

        spec.merge(
            PyOptionSpec.fromClasses(
                (PolicyGradient, DiscountedReward, ValueMatcher)))

        return spec
示例#5
0
 def get_option_spec(cls):
     spec = PyOptionSpec()
     test.setSpecOptions(spec.getOptionSpec())
     elf_C.setSpecELFOptions(spec.getOptionSpec())
     spec.addIntOption('gpu', 'GPU id to use', 0)
     spec.addIntOption('freq_update',
                       'How much update before updating the acting model',
                       50)
     spec.addStrOption('distri_mode', 'server or client', "")
     spec.addIntOption('num_recv', '', 2)
     spec.addStrListOption('parsed_args', 'dummy option', [])
     spec.merge(PyOptionSpec.fromClasses((PPO, )))
     return spec
示例#6
0
文件: server.py 项目: qucheng/ELF-1
    def get_option_spec(cls):
        spec = PyOptionSpec()
        go.getServerPredefined(spec.getOptionSpec())

        spec.addIntOption('gpu', 'GPU id to use', -1)
        spec.addIntOption(
            'eval_old_model',
            ('If specified, then we directly switch to evaluation mode '
             'between the loaded model and the old model specified by this '
             'switch'), -1)
        spec.addStrOption('comment', 'Comment for this run', '')
        spec.addBoolOption("parameter_print", "Print parameters", True)

        spec.merge(PyOptionSpec.fromClasses((MoreLabels, )))
        return spec
示例#7
0
    def get_option_spec(cls):
        spec = PyOptionSpec()
        go.getClientPredefined(spec.getOptionSpec())

        spec.addIntOption('gpu', 'GPU id to use', -1)
        spec.addStrOption(
            'eval_model_pair',
            ('If specified for df_selfplay.py, then the two models will be '
             'evaluated on this client'), '')
        spec.addStrOption('comment', 'Comment for this run', '')

        spec.addIntOption('selfplay_timeout_usec', 'Timeout used for MCTS', 10)

        spec.addBoolOption("parameter_print", "Print parameters", True)

        spec.merge(PyOptionSpec.fromClasses((MoreLabels, )))
        return spec
示例#8
0
文件: server.py 项目: qucheng/ELF-1
def main():
    print(sys.version)
    print(torch.__version__)
    print(torch.version.cuda)
    print("Conda env: \"%s\"" % os.environ.get("CONDA_DEFAULT_ENV", ""))

    option_spec = PyOptionSpec()
    option_spec.merge(PyOptionSpec.fromClasses((RunGC,)))
    option_map = option_spec.parse()

    rungc = RunGC(option_map)
    rungc.initialize()

    num_batch = 10000000
    rungc.wrapper.start()

    for i in range(num_batch):
        rungc.wrapper.run()

    rungc.wrapper.stop()
示例#9
0
文件: game.py 项目: alatyshe/ELF
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addBoolOption('actor_only', 'TODO: fill this help message in',
                           False)
        spec.addStrListOption(
            'list_files', 'Provide a list of json files for offline training',
            [])
        spec.addIntOption('port', 'TODO: fill this help message in', 5556)
        spec.addStrOption('server_addr', 'TODO: fill this help message in', '')
        spec.addStrOption('server_id', 'TODO: fill this help message in', '')
        spec.addIntOption('q_min_size', 'TODO: fill this help message in', 10)
        spec.addIntOption('q_max_size', 'TODO: fill this help message in',
                          1000)
        spec.addIntOption('num_reader', 'TODO: fill this help message in', 50)
        spec.addIntOption('num_reset_ranking',
                          'TODO: fill this help message in', 5000)
        spec.addIntOption(
            'client_max_delay_sec',
            'Maximum amount of allowed delays in sec. If the client '
            'didn\'t respond after that, we think it is dead.', 1200)
        spec.addBoolOption('verbose', 'TODO: fill this help message in', False)
        spec.addBoolOption('keep_prev_selfplay',
                           'TODO: fill this help message in', False)
        spec.addIntOption(
            'num_games_per_thread',
            ('For offline mode, it is the number of concurrent games per '
             'thread, used to increase diversity of games; for selfplay mode, '
             'it is the number of games played at each thread, and after that '
             'we need to call restartAllGames() to resume.'), -1)
        spec.addIntOption('expected_num_clients', 'Expected number of clients',
                          -1)
        spec.addIntOption('checkers_num_future_actions',
                          'TODO: fill this help message in', 1)
        spec.addStrOption('mode', 'TODO: fill this help message in', 'play')
        spec.addBoolOption('black_use_policy_network_only',
                           'TODO: fill this help message in', False)
        spec.addBoolOption('white_use_policy_network_only',
                           'TODO: fill this help message in', False)
        spec.addBoolOption('use_mcts', 'TODO: fill this help message in',
                           False)
        spec.addBoolOption('use_mcts_ai2', 'TODO: fill this help message in',
                           False)
        spec.addFloatOption(
            'white_puct',
            'PUCT for white when it is > 0.0. If it is -1 then we use'
            'the same puct for both side (specified by mcts_options).'
            'A HACK to use different puct for different model. Should'
            'be replaced by a more systematic approach.', -1.0)
        spec.addIntOption('white_mcts_rollout_per_batch',
                          'white mcts rollout per batch', -1)
        spec.addIntOption('white_mcts_rollout_per_thread',
                          'white mcts rollout per thread', -1)
        spec.addStrOption('dump_record_prefix',
                          'TODO: fill this help message in', '')
        spec.addStrOption('selfplay_records_directory',
                          'TODO: fill this help message in', '')
        spec.addStrOption('eval_records_directory',
                          'TODO: fill this help message in', '')
        spec.addStrOption('records_buffer_directory',
                          'TODO: fill this help message in', '')
        spec.addIntOption('policy_distri_cutoff',
                          'first N moves will be randomly', 0)
        spec.addIntOption('selfplay_timeout_usec',
                          'TODO: fill this help message in', 0)
        spec.addIntOption('gpu', 'TODO: fill this help message in', -1)
        spec.addBoolOption('policy_distri_training_for_all',
                           'TODO: fill this help message in', False)
        spec.addBoolOption('parameter_print',
                           'TODO: fill this help message in', True)
        spec.addIntOption('batchsize', 'batch size', 128)
        spec.addIntOption('batchsize2', 'batch size', -1)
        spec.addIntOption('T', 'number of timesteps', 6)
        spec.addIntOption(
            'selfplay_init_num',
            ('Initial number of selfplay games to generate before training a '
             'new model'), 2000)
        spec.addIntOption(
            'selfplay_update_num',
            ('Additional number of selfplay games to generate after a model '
             'is updated'), 1000)
        spec.addBoolOption('selfplay_async',
                           ('Whether to use async mode in selfplay'), False)
        spec.addIntOption(
            'eval_num_games',
            ('number of evaluation to be performed to decide whether a model '
             'is better than the other'), 400)
        spec.addFloatOption('eval_winrate_thres',
                            'Win rate threshold for evalution', 0.55)
        spec.addIntOption(
            'eval_old_model',
            ('If specified, then we directly switch to evaluation mode '
             'between the loaded model and the old model specified by this '
             'switch'), -1)
        spec.addStrOption(
            'eval_model_pair',
            ('If specified for df_selfplay.py, then the two models will be '
             'evaluated on this client'), '')
        spec.addBoolOption(
            'cheat_eval_new_model_wins_half',
            'When enabled, in evaluation mode, when the game '
            'finishes, the player with the most recent model gets 100% '
            'win rate half of the time.'
            'This is used to test the framework', False)
        spec.addBoolOption(
            'cheat_selfplay_random_result',
            'When enabled, in selfplay mode the result of the game is random'
            'This is used to test the framework', False)
        spec.addBoolOption('human_plays_for_black', '', False)
        spec.addIntOption(
            'suicide_after_n_games',
            'return after n games have finished, -1 means it never ends', -1)

        spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels)))
        return spec
示例#10
0
def load_env(
    envs,
    num_models=None,
    overrides=None,
    additional_to_load=None):
  """Load envs.

  Envs will be specified as environment variables. Specifically, the
  environment variables ``game``, ``model_file`` and ``model`` are
  required.

  ``additional_to_load`` is a dict with the following format:

    {'variable_name': (option_spec, callable)}

  For each element in ``additional_to_load``, ``load_env`` will parse
  the ``option_spec``, pass the resulting option map to ``callable``,
  and store the result of ``callable`` in the return value
  (under the key ``name``).

  Returns:
    env: dict of
      ``game`` : game module
      ``method``: Learning method used
      ``model_loaders``: loaders for model
  """
  logger = logging.getIndexedLogger('\u001b[31;1m|py|\u001b[0mrlpytorch.model_loader.load_env', '')
  logger.info('Loading env')


  game_loader_class = load_module(envs["game"]).Loader
  logger.info(f'\u001b[32;1mModule game successfully loaded :\u001b[0m {envs["game"]}')

  model_file = load_module(envs["model_file"])
  logger.info(f'\u001b[32;1mModule model_file successfully loaded :\u001b[0m {envs["model_file"]}')

  # TODO This is not good, need to fix.
  if len(model_file.Models[envs["model"]]) == 2:
    model_class, method_class = model_file.Models[envs["model"]]
    sampler_class = Sampler
  else:
    model_class, method_class, sampler_class = \
      model_file.Models[envs["model"]]

  overrides = dict(overrides) if overrides else {}
  overrides.update(getattr(model_file, "Overrides", {}))

  option_spec = PyOptionSpec()
  option_spec.merge(PyOptionSpec.fromClasses((
    logging.GlobalLoggingConfigurator,
    game_loader_class,
    method_class,
    sampler_class,
    ModelInterface,
  )))

  if num_models is None:
    option_spec.merge(ModelLoader.get_option_spec(model_class))
  else:
    for i in range(num_models):
      option_spec.merge(
        ModelLoader.get_option_spec(model_class, model_idx=i))
  if additional_to_load:
    for additional_option_spec, _ in additional_to_load.values():
      option_spec.merge(additional_option_spec)

  option_map = option_spec.parse(overrides=overrides)

  global_logger_configurator = logging.GlobalLoggingConfigurator(option_map)
  global_logger_configurator.configure()

  game = game_loader_class(option_map)
  method = method_class(option_map)
  sampler = sampler_class(option_map)
  mi = ModelInterface(option_map)

  # You might want multiple models loaded.
  if num_models is None:
    model_loaders = [ModelLoader(option_map, model_class)]
  else:
    model_loaders = [ModelLoader(option_map, model_class, model_idx=i)
             for i in range(num_models)]

  env = dict(
    game=game,
    method=method,
    sampler=sampler,
    model_loaders=model_loaders,
    mi=mi,
  )
  if additional_to_load:
    for name, (_, option_map_callable) in additional_to_load.items():
      env[name] = option_map_callable(option_map)


  pretty_option_str = pprint.pformat(option_map.getOptionDict(), width=50)
  logger.info(f'Parsed options:\n{pretty_option_str}')
  logger.info('Finished loading env')

  # env_str = ""
  # for i in env:
  #     env_str += f"{i} : {env[i]}\n"
  # logger.info(f"env :\n{env_str}")
  return env
示例#11
0
文件: game.py 项目: bearrundr/ELF
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addStrOption(
            'preload_sgf',
            'TODO: fill this help message in',
            '')
        spec.addIntOption(
            'preload_sgf_move_to',
            'TODO: fill this help message in',
            -1)
        spec.addBoolOption(
            'actor_only',
            'TODO: fill this help message in',
            False)
        spec.addStrListOption(
            'list_files',
            'Provide a list of json files for offline training',
            [])
        spec.addIntOption(
            'port',
            'TODO: fill this help message in',
            5556)
        spec.addStrOption(
            'server_addr',
            'TODO: fill this help message in',
            '')
        spec.addStrOption(
            'server_id',
            'TODO: fill this help message in',
            '')
        spec.addIntOption(
            'q_min_size',
            'TODO: fill this help message in',
            10)
        spec.addIntOption(
            'q_max_size',
            'TODO: fill this help message in',
            1000)
        spec.addIntOption(
            'num_reader',
            'TODO: fill this help message in',
            50)
        spec.addIntOption(
            'num_reset_ranking',
            'TODO: fill this help message in',
            5000)
        spec.addIntOption(
            'client_max_delay_sec',
            'Maximum amount of allowed delays in sec. If the client '
            'didn\'t respond after that, we think it is dead.',
            1200)
        spec.addBoolOption(
            'verbose',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'keep_prev_selfplay',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'print_result',
            'TODO: fill this help message in',
            False)
        spec.addIntOption(
            'data_aug',
            'specify data augumentation, 0-7, -1 mean random',
            -1)
        spec.addIntOption(
            'ratio_pre_moves',
            ('how many moves to perform in each thread, before we use the '
             'data to train the model'),
            0)
        spec.addFloatOption(
            'start_ratio_pre_moves',
            ('how many moves to perform in each thread, before we use the '
             'first sgf file to train the model'),
            0.5)
        spec.addIntOption(
            'num_games_per_thread',
            ('For offline mode, it is the number of concurrent games per '
             'thread, used to increase diversity of games; for selfplay mode, '
             'it is the number of games played at each thread, and after that '
             'we need to call restartAllGames() to resume.'),
            -1)
        spec.addIntOption(
            'expected_num_clients',
            'Expected number of clients',
            -1
        )
        spec.addIntOption(
            'num_future_actions',
            'TODO: fill this help message in',
            1)
        spec.addIntOption(
            'move_cutoff',
            'Cutoff ply in replay',
            -1)
        spec.addStrOption(
            'mode',
            'TODO: fill this help message in',
            'online')
        spec.addBoolOption(
            'black_use_policy_network_only',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'white_use_policy_network_only',
            'TODO: fill this help message in',
            False)
        spec.addIntOption(
            'ply_pass_enabled',
            'TODO: fill this help message in',
            0)
        spec.addBoolOption(
            'use_mcts',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'use_mcts_ai2',
            'TODO: fill this help message in',
            False)
        spec.addFloatOption(
            'white_puct',
            'PUCT for white when it is > 0.0. If it is -1 then we use'
            'the same puct for both side (specified by mcts_options).'
            'A HACK to use different puct for different model. Should'
            'be replaced by a more systematic approach.',
            -1.0)
        spec.addIntOption(
            'white_mcts_rollout_per_batch',
            'white mcts rollout per batch',
            -1)
        spec.addIntOption(
            'white_mcts_rollout_per_thread',
            'white mcts rollout per thread',
            -1)
        spec.addBoolOption(
            'use_df_feature',
            'TODO: fill this help message in',
            False)
        spec.addStrOption(
            'dump_record_prefix',
            'TODO: fill this help message in',
            '')
        spec.addIntOption(
            'policy_distri_cutoff',
            'TODO: fill this help message in',
            0)
        spec.addFloatOption(
            'resign_thres',
            'TODO: fill this help message in',
            0.0)
        spec.addBoolOption(
            'following_pass',
            'TODO: fill this help message in',
            False)
        spec.addIntOption(
            'selfplay_timeout_usec',
            'TODO: fill this help message in',
            0)
        spec.addIntOption(
            'gpu',
            'TODO: fill this help message in',
            -1)
        spec.addBoolOption(
            'policy_distri_training_for_all',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'parameter_print',
            'TODO: fill this help message in',
            True)
        spec.addIntOption(
            'batchsize',
            'batch size',
            128)
        spec.addIntOption(
            'batchsize2',
            'batch size',
            -1)
        spec.addIntOption(
            'T',
            'number of timesteps',
            6)
        spec.addIntOption(
            'selfplay_init_num',
            ('Initial number of selfplay games to generate before training a '
             'new model'),
            2000)
        spec.addIntOption(
            'selfplay_update_num',
            ('Additional number of selfplay games to generate after a model '
             'is updated'),
            1000)
        spec.addBoolOption(
            'selfplay_async',
            ('Whether to use async mode in selfplay'),
            False)
        spec.addIntOption(
            'eval_num_games',
            ('number of evaluation to be performed to decide whether a model '
             'is better than the other'),
            400)
        spec.addFloatOption(
            'eval_winrate_thres',
            'Win rate threshold for evalution',
            0.55)
        spec.addIntOption(
            'eval_old_model',
            ('If specified, then we directly switch to evaluation mode '
             'between the loaded model and the old model specified by this '
             'switch'),
            -1)
        spec.addStrOption(
            'eval_model_pair',
            ('If specified for df_selfplay.py, then the two models will be '
             'evaluated on this client'),
            '')
        spec.addStrOption(
            'comment',
            'Comment for this run',
            '')
        spec.addBoolOption(
            'cheat_eval_new_model_wins_half',
            'When enabled, in evaluation mode, when the game '
            'finishes, the player with the most recent model gets 100% '
            'win rate half of the time.'
            'This is used to test the framework',
            False)
        spec.addBoolOption(
            'cheat_selfplay_random_result',
            'When enabled, in selfplay mode the result of the game is random'
            'This is used to test the framework',
            False)
        spec.addIntOption(
            'suicide_after_n_games',
            'return after n games have finished, -1 means it never ends',
            -1)

        spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels)))

        return spec
示例#12
0
    def on_batch(self, batch):
        #print("Receive batch: ", batch.smem.info(),
        #      ", curr_batchsize: ", str(batch.batchsize), sep='')
        #print(batch["s"])
        # print("Actor: " + str(datetime.timestamp(datetime.now())))
        #print("on actor")
        # n = self.params["num_action"]
        # print(batch.batchsize)
        res = self.model(batch)
        res["V"].data /= 10.0
        #print("s: %f, V: %f, pi: %s" % (batch["s"].data.item(), res["V"].data.item(), str(res["pi"].data)))
        return dict(V=res["V"].data, pi=res["pi"].data)

if __name__ == '__main__':
    option_spec = PyOptionSpec()
    option_spec.merge(PyOptionSpec.fromClasses((RunGC,)))
    option_map = option_spec.parse()

    rungc = RunGC(option_map)
    rungc.initialize()

    num_batch = 10000000
    start = time.perf_counter()

    rungc.wrapper.start()

    for i in range(num_batch):
        rungc.wrapper.run()

    elapsed = time.perf_counter() - start
    print("Time (s) per batch: %.6f sec" % (elapsed / num_batch))