def call_ray_start_with_external_redis(request): ports = getattr(request, "param", "6379") port_list = ports.split(",") for port in port_list: temp_dir = ray._private.utils.get_ray_temp_dir() _start_redis_instance(REDIS_EXECUTABLE, temp_dir, int(port), password="******") address_str = ",".join(map(lambda x: "localhost:" + x, port_list)) cmd = f"ray start --head --address={address_str} --redis-password=123" subprocess.call(cmd.split(" ")) yield address_str.split(",")[0] # Disconnect from the Ray cluster. ray.shutdown() # Kill the Ray cluster. subprocess.check_call(["ray", "stop"])
def _setup_redis(request): # Setup external Redis and env var for initialization. param = getattr(request, "param", {}) external_redis_ports = param.get("external_redis_ports") if external_redis_ports is None: with socket.socket() as s: s.bind(("", 0)) port = s.getsockname()[1] external_redis_ports = [port] else: del param["external_redis_ports"] processes = [] for port in external_redis_ports: temp_dir = ray._private.utils.get_ray_temp_dir() port, proc = _start_redis_instance( REDIS_EXECUTABLE, temp_dir, port, password=ray_constants.REDIS_DEFAULT_PASSWORD, ) processes.append(proc) wait_for_redis_to_start("127.0.0.1", port, ray_constants.REDIS_DEFAULT_PASSWORD) address_str = ",".join( map(lambda x: f"127.0.0.1:{x}", external_redis_ports)) import os old_addr = os.environ.get("RAY_REDIS_ADDRESS") os.environ["RAY_REDIS_ADDRESS"] = address_str yield if old_addr is not None: os.environ["RAY_REDIS_ADDRESS"] = old_addr else: del os.environ["RAY_REDIS_ADDRESS"] for proc in processes: proc.process.terminate()
def test_calling_start_ray_head(call_ray_stop_only): # Test that we can call ray start with various command line # parameters. # Test starting Ray with a redis port specified. check_call_ray(["start", "--head", "--port", "0"]) check_call_ray(["stop"]) # Test starting Ray with a node IP address specified. check_call_ray( ["start", "--head", "--node-ip-address", "127.0.0.1", "--port", "0"]) check_call_ray(["stop"]) # Test starting Ray with a system config parameter set. check_call_ray([ "start", "--head", "--system-config", '{"metrics_report_interval_ms":100}', "--port", "0", ]) check_call_ray(["stop"]) # Test starting Ray with the object manager and node manager ports # specified. check_call_ray([ "start", "--head", "--object-manager-port", "22345", "--node-manager-port", "54321", "--port", "0", ]) check_call_ray(["stop"]) # Test starting Ray with the worker port range specified. check_call_ray([ "start", "--head", "--min-worker-port", "51000", "--max-worker-port", "51050", "--port", "0", ]) check_call_ray(["stop"]) # Test starting Ray with a worker port list. check_call_ray(["start", "--head", "--worker-port-list", "10002,10003"]) check_call_ray(["stop"]) # Test starting Ray with a non-int in the worker port list. with pytest.raises(subprocess.CalledProcessError): check_call_ray(["start", "--head", "--worker-port-list", "10002,a"]) check_call_ray(["stop"]) # Test starting Ray with an invalid port in the worker port list. with pytest.raises(subprocess.CalledProcessError): check_call_ray(["start", "--head", "--worker-port-list", "100"]) check_call_ray(["stop"]) # Test starting Ray with the number of CPUs specified. check_call_ray(["start", "--head", "--num-cpus", "2", "--port", "0"]) check_call_ray(["stop"]) # Test starting Ray with the number of GPUs specified. check_call_ray(["start", "--head", "--num-gpus", "100", "--port", "0"]) check_call_ray(["stop"]) # Test starting Ray with redis shard ports specified. check_call_ray([ "start", "--head", "--redis-shard-ports", "6380,6381,6382", "--port", "0" ]) check_call_ray(["stop"]) # Test starting Ray with all arguments specified. check_call_ray([ "start", "--head", "--redis-shard-ports", "6380,6381,6382", "--object-manager-port", "22345", "--num-cpus", "2", "--num-gpus", "0", "--resources", '{"Custom": 1}', "--port", "0", ]) check_call_ray(["stop"]) temp_dir = ray._private.utils.get_ray_temp_dir() # Test starting Ray with RAY_REDIS_ADDRESS env. _, proc = _start_redis_instance( REDIS_EXECUTABLE, temp_dir, 8888, password=ray_constants.REDIS_DEFAULT_PASSWORD) os.environ["RAY_REDIS_ADDRESS"] = "127.0.0.1:8888" check_call_ray(["start", "--head"]) check_call_ray(["stop"]) proc.process.terminate() del os.environ["RAY_REDIS_ADDRESS"] # Test --block. Killing a child process should cause the command to exit. blocked = subprocess.Popen( ["ray", "start", "--head", "--block", "--port", "0"]) blocked.poll() assert blocked.returncode is None # Make sure ray cluster is up run_string_as_driver(""" import ray from time import sleep for i in range(0, 5): try: ray.init(address='auto') break except: sleep(1) """) # Make sure ray cluster is up run_string_as_driver(""" import ray from time import sleep for i in range(0, 5): try: ray.init(address='auto') break except: sleep(1) """) kill_process_by_name("raylet", SIGKILL=True) wait_for_children_of_pid_to_exit(blocked.pid, timeout=30) blocked.wait() assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit" # Test --block. Killing the command should clean up all child processes. blocked = subprocess.Popen( ["ray", "start", "--head", "--block", "--port", "0"]) blocked.poll() assert blocked.returncode is None # Include GCS, autoscaler monitor, client server, dashboard, raylet and # log_monitor.py num_children = 6 if not detect_fate_sharing_support(): # Account for ray_process_reaper.py num_children += 1 # Check a set of child process commands & scripts instead? wait_for_children_of_pid(blocked.pid, num_children=num_children, timeout=30) blocked.terminate() wait_for_children_of_pid_to_exit(blocked.pid, timeout=30) blocked.wait() assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"