def create_client_selector(hostports): clients = [] for hostport in hostports: client = postman.Client(hostport) client.connect(3) clients.append(client) return iter(itertools.cycle(clients))
def run_client(): client = postman.Client(address) client.connect(10) client.py_function( torch.zeros((1, 2)), torch.arange(10), (torch.empty(2, 3), torch.ones((1, 2))) ) client.batched_function(torch.zeros((1, 2)))
def test_none_return(self): def get_nothing(): # TODO(heiner): Add check on return shape. return torch.arange(2).reshape(1, 2) def return_nothing(t): return None def nothing(): return server = postman.Server("127.0.0.1:0") server.bind("get_nothing", get_nothing, batch_size=1) server.bind("return_nothing", return_nothing, batch_size=1) server.bind("nothing", nothing, batch_size=1) server.run() client = postman.Client("127.0.0.1:%i" % server.port()) client.connect(10) try: value = client.get_nothing() np.testing.assert_array_equal(value, np.arange(2)) value = client.return_nothing(torch.tensor(10)) # For now, "None" responses are empty tuples. self.assertEqual(value, ()) self.assertEqual(client.nothing(), ()) finally: server.stop()
def run_client(client_id): client = postman.Client(address) client.connect(10) arg = np.full((1, 2), client_id, dtype=np.float32) batched_arg = np.full((2,), client_id, dtype=np.float32) function_result = client.function(arg) batched_function_result = client.batched_function(batched_arg) np.testing.assert_array_equal(function_result, np.full((1, 2), client_id)) np.testing.assert_array_equal(batched_function_result, np.full((2,), client_id))
def run_client(): client = postman.Client(address) client.connect(10) local_replay_buffer = buffer.NestPrioritizedReplay(1000, 0, 0.6, 0.4, True) data = {} data["a"] = torch.Tensor(10) # testing, this could be a long-running c++ replay buffer adding local_replay_buffer.add_one(data, 1) local_replay_buffer.add_one(data, 2) size, batch, priority = local_replay_buffer.get_new_content() client.add_replay(batch, priority)
def main(): client_id = random.randint(0, 10000) print("Client with random id", client_id) client = postman.Client("localhost:12345") client.connect(deadline_sec=3) output = client.pyfunc(torch.zeros(1, 2)) client_array = torch.tensor([0, client_id, 2 * client_id]) inputs = (torch.tensor(0), torch.tensor(1), (client_array, torch.tensor(True))) client.identity(inputs) # Test that we get back what we expect. np.testing.assert_array_equal(client_array, client.identity(client_array)) np.testing.assert_array_equal(client_array, client.batched_identity(client_array))
def main(): client = postman.Client("%s:%d" % ("localhost", 12345)) client.connect(deadline_sec=10) local_replay_buffer = buffer.NestPrioritizedReplay(1000, 0, 0.6, 0.4, True) data = {} data["a"] = torch.Tensor(10) # testing, this could be a long-running c++ replay buffer adding local_replay_buffer.add_one(data, 1) local_replay_buffer.add_one(data, 2) time.sleep(1) size, batch, priority = local_replay_buffer.get_new_content() print(batch) client.add_replay(batch, priority) model = client.query_state_dict() print(model)
def __init__( self, *, model_path, max_batch_size, max_rollout_length=3, rollout_temperature, rollout_top_p=1.0, n_server_procs=1, n_gpu=1, n_rollout_procs=70, use_predicted_final_scores=True, postman_wait_till_full=False, use_server_addr=None, use_value_server_addr=None, device=None, mix_square_ratio_scoring=0, value_model_path=None, rollout_value_frac=0, ): super().__init__() self.n_rollout_procs = n_rollout_procs self.n_server_procs = n_server_procs self.use_predicted_final_scores = use_predicted_final_scores self.rollout_temperature = rollout_temperature self.rollout_top_p = rollout_top_p self.max_batch_size = max_batch_size self.max_rollout_length = max_rollout_length self.mix_square_ratio_scoring = mix_square_ratio_scoring self.rollout_value_frac = rollout_value_frac device = int(device.lstrip("cuda:")) if type(device) == str else device logging.info("Launching servers") assert n_gpu <= n_server_procs and n_server_procs % n_gpu == 0 try: mp.set_start_method("spawn") except RuntimeError: logging.warning("Failed mp.set_start_method") if use_server_addr is not None: assert value_model_path is None, "Not implemented" if n_server_procs != 1: raise ValueError( f"Bad args use_server_addr={use_server_addr} n_server_procs={n_server_procs}" ) self.hostports = [use_server_addr] self.value_hostport = use_value_server_addr else: _servers, _qs, self.hostports = zip(*[ make_server_process( model_path=model_path, device=i % n_gpu if device is None else device, max_batch_size=max_batch_size, wait_till_full=postman_wait_till_full, # if torch's seed is set, then we want the server seed to be a deterministic # function of that. Yet we don't want it to be the same for each agent. # So pick a random number from the torch rng seed=int(torch.randint(1000000000, (1, ))), ) for i in range(n_server_procs) ]) if value_model_path is not None: _, _, self.value_hostport = make_server_process( model_path=value_model_path, device=n_server_procs % n_gpu if device is None else device, max_batch_size=max_batch_size, wait_till_full=postman_wait_till_full, seed=int(torch.randint(1000000000, (1, ))), ) else: self.value_hostport = None self.client = postman.Client(self.hostports[0]) logging.info( f"Connecting to {self.hostports[0]} [{os.uname().nodename}]") self.client.connect(20) logging.info(f"Connected to {self.hostports[0]}") if n_rollout_procs > 0: self.proc_pool = mp.Pool(n_rollout_procs) logging.info("Warming up pool") self.proc_pool.map(float, range(n_rollout_procs)) logging.info("Done warming up pool") else: logging.info("Debug mode: using fake process poll") fake_pool_class = type("FakePool", (), { "map": lambda self, *a, **k: map(*a, **k) }) self.proc_pool = fake_pool_class()
def do_rollout( cls, *, game_json, hostport, set_orders_dict={}, temperature, top_p, max_rollout_length, batch_size=1, use_predicted_final_scores, mix_square_ratio_scoring=0, value_hostport=None, rollout_value_frac=0, ) -> Tuple[Tuple[Dict, List[Dict]], TimingCtx]: """Complete game, optionally setting orders for the current turn This method can safely be called in a subprocess Arguments: - game_json: json-formatted game string, e.g. output of to_saved_game_format(game) - hostport: string, "{host}:{port}" of model server - set_orders_dict: Dict[power, orders] to set for current turn - temperature: model softmax temperature for rollout policy - top_p: probability mass to samples from for rollout policy - max_rollout_length: return SC count after at most # steps - batch_size: rollout # of games in parallel - use_predicted_final_scores: if True, use model's value head for final SC predictions Returns a 2-tuple: - results, a 2-tuple: - set_orders_dict: Dict[power, orders] - list of Dict[power, final_score], len=batch_size - timings: a TimingCtx """ timings = TimingCtx() with timings("postman.client"): client = postman.Client(hostport) client.connect(3) if value_hostport is not None: value_client = postman.Client(value_hostport) value_client.connect(3) else: value_client = client with timings("setup"): faulthandler.register(signal.SIGUSR2) torch.set_num_threads(1) games = [ pydipcc.Game.from_json(game_json) for _ in range(batch_size) ] for i in range(len(games)): games[i].game_id += f"_{i}" est_final_scores = {} # game id -> np.array len=7 # set orders if specified for power, orders in set_orders_dict.items(): for game in games: game.set_orders(power, list(orders)) other_powers = [p for p in POWERS if p not in set_orders_dict] rollout_start_phase = games[0].current_short_phase rollout_end_phase = n_move_phases_later(rollout_start_phase, max_rollout_length) while True: if max_rollout_length == 0: # Handled separately. break # exit loop if all games are done before max_rollout_length ongoing_game_phases = [ game.current_short_phase for game in games if not game.is_game_done ] if len(ongoing_game_phases) == 0: break # step games together at the pace of the slowest game, e.g. process # games with retreat phases alone before moving on to the next move phase min_phase = min(ongoing_game_phases, key=sort_phase_key) batch_data = [] for game in games: if not game.is_game_done and game.current_short_phase == min_phase: with timings("encode.all_poss_orders"): all_possible_orders = game.get_all_possible_orders() with timings("encode.inputs"): inputs = FeatureEncoder().encode_inputs([game]) batch_data.append((game, inputs)) with timings("cat_pad"): xs: List[Tuple] = [b[1] for b in batch_data] batch_inputs = cls.cat_pad_inputs(xs) with timings("model"): if client != value_client: assert ( rollout_value_frac == 0 ), "If separate value model, you can't add in value each step (slow)" cur_client = value_client if min_phase == rollout_end_phase else client batch_orders, _, batch_est_final_scores = self.do_model_request( batch_inputs, temperature, top_p, client=cur_client) if min_phase == rollout_end_phase: with timings("score.accumulate"): for game_idx, (game, _) in enumerate(batch_data): est_final_scores[game.game_id] = np.array( batch_est_final_scores[game_idx]) # skip env step and exit loop once we've accumulated the estimated # scores for all games up to max_rollout_length break with timings("env"): assert len(batch_data) == len(batch_orders), "{} != {}".format( len(batch_data), len(batch_orders)) # set_orders and process assert len(batch_data) == len(batch_orders) for (game, _), power_orders in zip(batch_data, batch_orders): if game.is_game_done: continue power_orders = dict(zip(POWERS, power_orders)) for other_power in other_powers: game.set_orders(other_power, list(power_orders[other_power])) assert game.current_short_phase == min_phase game.process() for (game, _) in batch_data: if game.is_game_done: with timings("score.gameover"): final_scores = np.array( get_square_scores_from_game(game)) est_final_scores[game.game_id] = final_scores other_powers = POWERS # no set orders on subsequent turns # out of rollout loop if max_rollout_length > 0: assert len(est_final_scores) == len(games) else: assert ( not other_powers ), "If max_rollout_length=0 it's assumed that all orders are pre-defined." # All orders are set. Step env. Now only need to get values. game.process() batch_data = [] for game in games: if not game.is_game_done: with timings("encode.inputs"): inputs = FeatureEncoder().encode_inputs([game]) batch_data.append((game, inputs)) else: est_final_scores[ game.game_id] = get_square_scores_from_game(game) if batch_data: with timings("cat_pad"): xs: List[Tuple] = [b[1] for b in batch_data] batch_inputs = self.cat_pad_inputs(xs) with timings("model"): _, _, batch_est_final_scores = self.do_model_request( batch_inputs, temperature, top_p, client=value_client) assert batch_est_final_scores.shape[0] == len(batch_data) assert batch_est_final_scores.shape[1] == len(POWERS) for game_idx, (game, _) in enumerate(batch_data): est_final_scores[ game.game_id] = batch_est_final_scores[game_idx] with timings("final_scores"): # get GameScores objects for current game state current_game_scores = [{ p: compute_game_scores_from_state(i, game.get_state()) for i, p in enumerate(POWERS) } for game in games] # get estimated or current sum of squares scoring final_game_scores = [ dict(zip(POWERS, est_final_scores[game.game_id])) for game, current_scores in zip(games, current_game_scores) ] # mix in current sum of squares ratio to encourage losing powers to try hard if mix_square_ratio_scoring > 0: for game, final_scores, current_scores in zip( games, final_game_scores, current_game_scores): for p in POWERS: final_scores[p] = ( 1 - mix_square_ratio_scoring) * final_scores[p] + ( mix_square_ratio_scoring * current_scores[p].square_ratio) result = (set_orders_dict, final_game_scores) return result, timings
def run_client(): client = postman.Client(address) client.connect(10) model = client.query_state_dict() self.assertEqual(model["fc.weight"].size, 100) self.assertEqual(model["fc.bias"].size, 10)
def run_client(port): client = postman.Client("%s:%i" % (address, port)) client.connect(10) client.foo(torch.Tensor(init_batch_size, 2, 2)) client.foo(torch.Tensor(final_batch_size, 2, 2))