def make_global_state_accessor(address_info): if not gcs_utils.use_gcs_for_bootstrap(): gcs_options = GcsClientOptions.from_redis_address( address_info["redis_address"], ray.ray_constants.REDIS_DEFAULT_PASSWORD ) else: gcs_options = GcsClientOptions.from_gcs_address(address_info["gcs_address"]) global_state_accessor = GlobalStateAccessor(gcs_options) global_state_accessor.connect() return global_state_accessor
def assert_no_thrashing(address): state = ray._private.state.GlobalState() options = GcsClientOptions.from_gcs_address(address) state._initialize_global_state(options) summary = memory_summary(address=address, stats_only=True) restored_bytes = 0 consumed_bytes = 0 for line in summary.split("\n"): if "Restored" in line: restored_bytes = int(line.split(" ")[1]) if "consumed" in line: consumed_bytes = int(line.split(" ")[-2]) assert (consumed_bytes >= restored_bytes ), f"consumed: {consumed_bytes}, restored: {restored_bytes}"
def assert_no_thrashing(address): state = ray.state.GlobalState() if use_gcs_for_bootstrap(): options = GcsClientOptions.from_gcs_address(address) else: options = GcsClientOptions.from_redis_address( address, ray.ray_constants.REDIS_DEFAULT_PASSWORD) state._initialize_global_state(options) summary = memory_summary(address=address, stats_only=True) restored_bytes = 0 consumed_bytes = 0 for line in summary.split("\n"): if "Restored" in line: restored_bytes = int(line.split(" ")[1]) if "consumed" in line: consumed_bytes = int(line.split(" ")[-2]) assert (consumed_bytes >= restored_bytes ), f"consumed: {consumed_bytes}, restored: {restored_bytes}"
def memory_summary( address=None, redis_password=ray_constants.REDIS_DEFAULT_PASSWORD, group_by="NODE_ADDRESS", sort_by="OBJECT_SIZE", units="B", line_wrap=True, stats_only=False, num_entries=None, ): from ray.dashboard.memory_utils import memory_summary address = services.canonicalize_bootstrap_address(address) state = GlobalState() options = GcsClientOptions.from_gcs_address(address) state._initialize_global_state(options) if stats_only: return get_store_stats(state) return memory_summary(state, group_by, sort_by, line_wrap, units, num_entries) + get_store_stats(state)
def make_global_state_accessor(ray_context): gcs_options = GcsClientOptions.from_gcs_address( ray_context.address_info["gcs_address"]) global_state_accessor = GlobalStateAccessor(gcs_options) global_state_accessor.connect() return global_state_accessor
def add_node(self, wait=True, **node_args): """Adds a node to the local Ray Cluster. All nodes are by default started with the following settings: cleanup=True, num_cpus=1, object_store_memory=150 * 1024 * 1024 # 150 MiB Args: wait (bool): Whether to wait until the node is alive. node_args: Keyword arguments used in `start_ray_head` and `start_ray_node`. Overrides defaults. Returns: Node object of the added Ray node. """ default_kwargs = { "num_cpus": 1, "num_gpus": 0, "object_store_memory": 150 * 1024 * 1024, # 150 MiB "min_worker_port": 0, "max_worker_port": 0, "dashboard_port": None, } ray_params = ray._private.parameter.RayParams(**node_args) ray_params.update_if_absent(**default_kwargs) with disable_client_hook(): if self.head_node is None: node = ray.node.Node( ray_params, head=True, shutdown_at_exit=self._shutdown_at_exit, spawn_reaper=self._shutdown_at_exit, ) self.head_node = node self.redis_address = self.head_node.redis_address self.redis_password = node_args.get( "redis_password", ray_constants.REDIS_DEFAULT_PASSWORD) self.webui_url = self.head_node.webui_url # Init global state accessor when creating head node. gcs_options = GcsClientOptions.from_gcs_address( node.gcs_address) self.global_state._initialize_global_state(gcs_options) else: ray_params.update_if_absent(redis_address=self.redis_address) ray_params.update_if_absent(gcs_address=self.gcs_address) # We only need one log monitor per physical node. ray_params.update_if_absent(include_log_monitor=False) # Let grpc pick a port. ray_params.update_if_absent(node_manager_port=0) node = ray.node.Node( ray_params, head=False, shutdown_at_exit=self._shutdown_at_exit, spawn_reaper=self._shutdown_at_exit, ) self.worker_nodes.add(node) if wait: # Wait for the node to appear in the client table. We do this # so that the nodes appears in the client table in the order # that the corresponding calls to add_node were made. We do # this because in the tests we assume that the driver is # connected to the first node that is added. self._wait_for_node(node) return node