Exemplo n.º 1
0
def _start_learning(config, end_stats=None, checkpoint=None):
    print("Writing logs and checkpoints to:", config.path)
    print("Model type: %s(%s, %s)" %
          (config.nn_model, config.nn_width, config.nn_depth))

    with open(os.path.join(config.path, "config.json"), "w") as fp:
        fp.write(json.dumps(config._asdict(), indent=2, sort_keys=True) + "\n")

    game = pyspiel.load_game(config.game)

    actors = [
        spawn.Process(actor,
                      kwargs={
                          "game": game,
                          "config": config,
                          "num": i,
                          "checkpoint": checkpoint
                      }) for i in range(config.actors)
    ]
    evaluators = [
        spawn.Process(evaluator,
                      kwargs={
                          "game": game,
                          "config": config,
                          "num": i,
                          "checkpoint": checkpoint
                      }) for i in range(config.evaluators)
    ]

    def broadcast(msg):
        for proc in actors + evaluators:
            proc.queue.put(msg)

    try:
        learner(
            game=game,
            config=config,
            actors=actors,  # pylint: disable=missing-kwoa
            evaluators=evaluators,
            broadcast_fn=broadcast,
            end_stats=end_stats,
            checkpoint=checkpoint)
    except (KeyboardInterrupt, EOFError):
        print("Caught a KeyboardInterrupt, stopping early.")
    finally:
        broadcast("")
        for proc in actors + evaluators:
            proc.join()
Exemplo n.º 2
0
  def test_spawn_works(self):
    max_sleep_time = 0.01  # 10ms

    def worker_fn(worker_id, queue):
      queue.put(worker_id)  # Show it's up and running.
      random.seed(time.time() + worker_id)
      while True:
        value = queue.get()
        if value is None:
          break
        time.sleep(max_sleep_time * random.random())
        queue.put((worker_id, value))

    num_workers = 5
    workers = [spawn.Process(worker_fn, kwargs={"worker_id": i})
               for i in range(num_workers)]

    # Make sure they're warmed up.
    for worker_id, worker in enumerate(workers):
      self.assertEqual(worker_id, worker.queue.get())

    num_work_units = 40
    expected_output = []
    for worker_id, worker in enumerate(workers):
      for i in range(num_work_units):
        worker.queue.put(i)
        expected_output.append((worker_id, i))
      worker.queue.put(None)

    start_time = time.time()

    output = []
    i = 0
    while len(output) < len(expected_output):
      for worker in workers:
        try:
          output.append(worker.queue.get_nowait())
        except spawn.Empty:
          pass

      time.sleep(0.001)
      i += 1
      self.assertLess(time.time() - start_time,
                      20 * max_sleep_time * num_work_units,
                      msg=f"Don't wait forever. Loop {i}, found {len(output)}")

    time_taken = time.time() - start_time
    print("Finished in {:.3f}s, {:.2f}x the max".format(
        time_taken, time_taken / (max_sleep_time * num_work_units)))

    for worker in workers:
      worker.join()

    # All messages arrived
    self.assertLen(output, len(expected_output))
    self.assertCountEqual(output, expected_output)

    # The messages arrived out of order, showing parallelism.
    self.assertNotEqual(output, expected_output)
Exemplo n.º 3
0
def alpha_zero(config: Config):
  """Start all the worker processes for a full alphazero setup."""
  game = pyspiel.load_game(config.game)
  config = config._replace(
      observation_shape=game.observation_tensor_shape(),
      output_size=game.num_distinct_actions())

  print("Starting game", config.game)
  if game.num_players() != 2:
    sys.exit("AlphaZero can only handle 2-player games.")
  game_type = game.get_type()
  if game_type.reward_model != pyspiel.GameType.RewardModel.TERMINAL:
    raise ValueError("Game must have terminal rewards.")
  if game_type.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL:
    raise ValueError("Game must have sequential turns.")
  if game_type.chance_mode != pyspiel.GameType.ChanceMode.DETERMINISTIC:
    raise ValueError("Game must be deterministic.")

  path = config.path
  if not path:
    path = tempfile.mkdtemp(prefix="az-{}-{}-".format(
        datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"), config.game))
    config = config._replace(path=path)

  if not os.path.exists(path):
    os.makedirs(path)
  if not os.path.isdir(path):
    sys.exit("{} isn't a directory".format(path))
  print("Writing logs and checkpoints to:", path)
  print("Model type: %s(%s, %s)" % (config.nn_model, config.nn_width,
                                    config.nn_depth))

  with open(os.path.join(config.path, "config.json"), "w") as fp:
    fp.write(json.dumps(config._asdict(), indent=2, sort_keys=True) + "\n")

  actors = [spawn.Process(actor, kwargs={"game": game, "config": config,
                                         "num": i})
            for i in range(config.actors)]
  evaluators = [spawn.Process(evaluator, kwargs={"game": game, "config": config,
                                                 "num": i})
                for i in range(config.evaluators)]

  def broadcast(msg):
    for proc in actors + evaluators:
      proc.queue.put(msg)

  try:
    learner(game=game, config=config, actors=actors,  # pylint: disable=missing-kwoa
            evaluators=evaluators, broadcast_fn=broadcast)
  except (KeyboardInterrupt, EOFError):
    print("Caught a KeyboardInterrupt, stopping early.")
  finally:
    broadcast("")
    # for actor processes to join we have to make sure that their q_in is empty,
    # including backed up items
    for proc in actors:
      while proc.exitcode is None:
        while not proc.queue.empty():
          proc.queue.get_nowait()
        proc.join(JOIN_WAIT_DELAY)
    for proc in evaluators:
      proc.join()
Exemplo n.º 4
0
    def inner_one(config, model_num):

        game = pyspiel.load_game(config.game)
        config = config._replace(
            observation_shape=game.observation_tensor_shape(),
            output_size=game.num_distinct_actions())

        print("Starting game", config.game)
        if game.num_players() != 2:
            sys.exit("AlphaZero can only handle 2-player games.")
        game_type = game.get_type()
        if game_type.reward_model != pyspiel.GameType.RewardModel.TERMINAL:
            raise ValueError("Game must have terminal rewards.")
        if game_type.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL:
            raise ValueError("Game must have sequential turns.")
        if game_type.chance_mode != pyspiel.GameType.ChanceMode.DETERMINISTIC:
            raise ValueError("Game must be deterministic.")

        path = config.path
        if not path:
            path = tempfile.mkdtemp(prefix="az-{}-{}-".format(
                datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"),
                config.game))
            config = config._replace(path=path)

        if not os.path.exists(path):
            os.makedirs(path)
        if not os.path.isdir(path):
            sys.exit("{} isn't a directory".format(path))
        print("Writing logs and checkpoints to:", path)
        print("Model type: %s(%s, %s)" %
              (config.nn_model, config.nn_width, config.nn_depth))

        if model_num == 1:
            with open(os.path.join(config.path, "config_1.json"), "w") as fp:
                fp.write(
                    json.dumps(config._asdict(), indent=2, sort_keys=True) +
                    "\n")
        else:
            with open(os.path.join(config.path, "config_2.json"), "w") as fp:
                fp.write(
                    json.dumps(config._asdict(), indent=2, sort_keys=True) +
                    "\n")

        actors = [
            spawn.Process(actor,
                          kwargs={
                              "game": game,
                              "config": config,
                              "num": i
                          }) for i in range(config.actors)
        ]
        evaluators = [
            spawn.Process(evaluator,
                          kwargs={
                              "game": game,
                              "config": config,
                              "num": i
                          }) for i in range(config.evaluators)
        ]

        return game, config, actors, evaluators
def from_checkpoint(path, checkpoint, steps):
    """
     Continues AlphaZero training for a model stored in 'path',
     from checkpoint 'checkpoint' (use checkpoint=-1 for most recent
     version of the model). And continues to train for 'steps' number
     of training steps.
  """

    needed_files = [
        "config.json",
        "learner.jsonl",
        "checkpoint-{}.data-00000-of-00001".format(checkpoint),
        "checkpoint-{}.index".format(checkpoint),
        "checkpoint-{}.meta".format(checkpoint),
    ]

    # Ensure the path is correct and all the files exist.
    if not os.path.isdir(path):
        sys.exit("{} is not a directory.".format(path))
    for file in needed_files:
        if not os.path.exists(os.path.join(path, file)):
            sys.exit("Could not find {} in {}.".format(file, path))

    # Read the config json into a dictionary.
    config_json = {}
    with open(os.path.join(path, "config.json"), "r") as f:
        config_json = json.load(f)

    # Read each line of the learner json into a list.
    learner_json_lines = []
    with open(os.path.join(path, "learner.jsonl"), "r") as f:
        learner_json_lines = f.readlines()

    # Get the line we are interested in (specified by the checkpoint).
    learner_end_index = checkpoint if checkpoint == -1 else checkpoint - 1
    learner_json_end = json.loads(learner_json_lines[learner_end_index])

    # Retrieve the statistics and data we need.
    end_stats = _end_stats_from_json(learner_json_end)
    config_json["max_steps"] = end_stats.end_step + steps
    config = _config_from_json(config_json)

    print("Writing logs and checkpoints to:", config.path)
    print("Model type: %s(%s, %s)" %
          (config.nn_model, config.nn_width, config.nn_depth))

    # Save the updated config file.
    with open(os.path.join(config.path, "config.json"), "w") as fp:
        fp.write(json.dumps(config._asdict(), indent=2, sort_keys=True) + "\n")

    # Save the updated learner file if needed.
    if checkpoint != -1:
        _delete_newer_checkpoints(config.path, checkpoint)
        with open(os.path.join(config.path, "learner.jsonl"), "w") as fp:
            fp.write("".join(learner_json_lines[:checkpoint]))

    game = pyspiel.load_game(config.game)

    actors = [
        spawn.Process(actor,
                      kwargs={
                          "game": game,
                          "config": config,
                          "num": i,
                          "checkpoint": checkpoint
                      }) for i in range(config.actors)
    ]
    evaluators = [
        spawn.Process(evaluator,
                      kwargs={
                          "game": game,
                          "config": config,
                          "num": i,
                          "checkpoint": checkpoint
                      }) for i in range(config.evaluators)
    ]

    def broadcast(msg):
        for proc in actors + evaluators:
            proc.queue.put(msg)

    try:
        learner(
            game=game,
            config=config,
            actors=actors,  # pylint: disable=missing-kwoa
            evaluators=evaluators,
            broadcast_fn=broadcast,
            end_stats=end_stats,
            checkpoint=checkpoint)
    except (KeyboardInterrupt, EOFError):
        print("Caught a KeyboardInterrupt, stopping early.")
    finally:
        broadcast("")
        for proc in actors + evaluators:
            proc.join()