def get_log_file_handles(self, name, unique=False): """Open log files with partially randomized filenames, returning the file handles. If output redirection has been disabled, no files will be opened and `(None, None)` will be returned. Args: name (str): descriptive string for this log file. unique (bool): if true, a counter will be attached to `name` to ensure the returned filename is not already used. Returns: A tuple of two file handles for redirecting (stdout, stderr), or `(None, None)` if output redirection is disabled. """ redirect_output = self._ray_params.redirect_output if redirect_output is None: # Make the default behavior match that of glog. redirect_output = os.getenv("GLOG_logtostderr") != "1" if not redirect_output: return None, None log_stdout, log_stderr = self._get_log_file_names(name, unique=unique) return open_log(log_stdout), open_log(log_stderr)
def start_dashboard(self, require_dashboard): """Start the dashboard. Args: require_dashboard (bool): If true, this will raise an exception if we fail to start the dashboard. Otherwise it will print a warning if we fail to start the dashboard. """ dashboard_out_name, dashboard_err_name = self.get_log_file_names( "dashboard", unique=True) stdout_file, stderr_file = (open_log(dashboard_out_name), open_log(dashboard_err_name)) self._webui_url, process_info = ray.services.start_dashboard( require_dashboard, self._ray_params.dashboard_host, self.redis_address, self._temp_dir, stdout_file=stdout_file, stderr_file=stderr_file, redis_password=self._ray_params.redis_password, fate_share=self.kernel_fate_share, port=self._ray_params.dashboard_port) assert ray_constants.PROCESS_TYPE_DASHBOARD not in self.all_processes if process_info is not None: self.all_processes[ray_constants.PROCESS_TYPE_DASHBOARD] = [ process_info, ] redis_client = self.create_redis_client() redis_client.hmset("webui", {"url": self._webui_url})
def start_redis(self): """Start the Redis servers.""" assert self._redis_address is None redis_out_name, redis_err_name = self.get_log_file_names("redis", unique=True) redis_log_files = [(open_log(redis_out_name), open_log(redis_err_name)) ] for i in range(self._ray_params.num_redis_shards): shard_out_name, shard_err_name = self.get_log_file_names( "redis-shard_{}".format(i), unique=True) redis_log_files.append( (open_log(shard_out_name), open_log(shard_err_name))) (self._redis_address, redis_shards, process_infos) = ray.services.start_redis( self._node_ip_address, redis_log_files, self.get_resource_spec(), port=self._ray_params.redis_port, redis_shard_ports=self._ray_params.redis_shard_ports, num_redis_shards=self._ray_params.num_redis_shards, redis_max_clients=self._ray_params.redis_max_clients, redirect_worker_output=True, password=self._ray_params.redis_password, include_java=self._ray_params.include_java, fate_share=self.kernel_fate_share) assert (ray_constants.PROCESS_TYPE_REDIS_SERVER not in self.all_processes) self.all_processes[ray_constants.PROCESS_TYPE_REDIS_SERVER] = ( process_infos)
def get_logs() -> Tuple[IO, IO]: try: os.makedirs(LOG_DIR) except OSError: pass err_path = os.path.join(LOG_DIR, ERR_NAME) out_path = os.path.join(LOG_DIR, OUT_NAME) return open_log(err_path), open_log(out_path)
def start_monitor(self): """Start the monitor.""" monitor_out_name, monitor_err_name = self.get_log_file_names( "monitor", unique=True) stdout_file, stderr_file = (open_log(monitor_out_name), open_log(monitor_err_name)) process_info = ray.services.start_monitor( self._redis_address, stdout_file=stdout_file, stderr_file=stderr_file, autoscaling_config=self._ray_params.autoscaling_config, redis_password=self._ray_params.redis_password, fate_share=self.kernel_fate_share) assert ray_constants.PROCESS_TYPE_MONITOR not in self.all_processes self.all_processes[ray_constants.PROCESS_TYPE_MONITOR] = [process_info]
def start_reporter(self): """Start the reporter.""" reporter_out_name, reporter_err_name = self.get_log_file_names( "reporter", unique=True) stdout_file, stderr_file = (open_log(reporter_out_name), open_log(reporter_err_name)) process_info = ray.services.start_reporter( self.redis_address, stdout_file=stdout_file, stderr_file=stderr_file, redis_password=self._ray_params.redis_password, fate_share=self.kernel_fate_share) assert ray_constants.PROCESS_TYPE_REPORTER not in self.all_processes if process_info is not None: self.all_processes[ray_constants.PROCESS_TYPE_REPORTER] = [ process_info, ]
def start_log_monitor(self): """Start the log monitor.""" log_out_name, log_err_name = self.get_log_file_names("log_monitor", unique=True) stdout_file, stderr_file = open_log(log_out_name), open_log( log_err_name) process_info = ray.services.start_log_monitor( self.redis_address, self._logs_dir, stdout_file=stdout_file, stderr_file=stderr_file, redis_password=self._ray_params.redis_password, fate_share=self.kernel_fate_share) assert ray_constants.PROCESS_TYPE_LOG_MONITOR not in self.all_processes self.all_processes[ray_constants.PROCESS_TYPE_LOG_MONITOR] = [ process_info, ]
def start_gcs_server(self): """Start the gcs server. """ gcs_out_name, gcs_err_name = self.get_log_file_names("gcs_server", unique=True) stdout_file, stderr_file = (open_log(gcs_out_name), open_log(gcs_err_name)) process_info = ray.services.start_gcs_server( self._redis_address, stdout_file=stdout_file, stderr_file=stderr_file, redis_password=self._ray_params.redis_password, config=self._config, fate_share=self.kernel_fate_share) assert (ray_constants.PROCESS_TYPE_GCS_SERVER not in self.all_processes) self.all_processes[ray_constants.PROCESS_TYPE_GCS_SERVER] = [ process_info, ]
def start_raylet(self, use_valgrind=False, use_profiler=False): """Start the raylet. Args: use_valgrind (bool): True if we should start the process in valgrind. use_profiler (bool): True if we should start the process in the valgrind profiler. """ raylet_out_name, raylet_err_name = self.get_log_file_names("raylet", unique=True) stdout_file, stderr_file = (open_log(raylet_out_name), open_log(raylet_err_name)) process_info = ray.services.start_raylet( self._redis_address, self._node_ip_address, self._ray_params.node_manager_port, self._raylet_socket_name, self._plasma_store_socket_name, self._ray_params.worker_path, self._temp_dir, self._session_dir, self.get_resource_spec(), self._ray_params.min_worker_port, self._ray_params.max_worker_port, self._ray_params.object_manager_port, self._ray_params.redis_password, self._ray_params.metrics_agent_port, use_valgrind=use_valgrind, use_profiler=use_profiler, stdout_file=stdout_file, stderr_file=stderr_file, config=self._config, include_java=self._ray_params.include_java, java_worker_options=self._ray_params.java_worker_options, load_code_from_local=self._ray_params.load_code_from_local, plasma_directory=self._ray_params.plasma_directory, huge_pages=self._ray_params.huge_pages, fate_share=self.kernel_fate_share, socket_to_use=self.socket, head_node=self.head) assert ray_constants.PROCESS_TYPE_RAYLET not in self.all_processes self.all_processes[ray_constants.PROCESS_TYPE_RAYLET] = [process_info]
def start_plasma_store(self): """Start the plasma store.""" plasma_out_name, plasma_err_name = self.get_log_file_names( "plasma_store", unique=True) stdout_file, stderr_file = (open_log(plasma_out_name), open_log(plasma_err_name)) process_info = ray.services.start_plasma_store( self.get_resource_spec(), self._plasma_store_socket_name, stdout_file=stdout_file, stderr_file=stderr_file, plasma_directory=self._ray_params.plasma_directory, huge_pages=self._ray_params.huge_pages, keep_idle=bool(self._config.get("plasma_store_as_thread")), fate_share=self.kernel_fate_share) assert (ray_constants.PROCESS_TYPE_PLASMA_STORE not in self.all_processes) self.all_processes[ray_constants.PROCESS_TYPE_PLASMA_STORE] = [ process_info, ]