def test_maxtasksperchild(shutdown_only): def f(args): return os.getpid() pool = Pool(5, maxtasksperchild=1) assert len(set(pool.map(f, range(20)))) == 20 pool.terminate() pool.join()
def launch_long_running_tasks(num_pool=5): # doing the work, collecting data, updating the database # create an Actor pool of num_pool workers nodes pool = Pool(num_pool) results = [] for result in pool.map(task, range(1, 500, 10)): results.append(result) pool.terminate() return results
def test_initializer(shutdown_only): def init(dirname): with open(os.path.join(dirname, str(os.getpid())), "w") as f: print("hello", file=f) with tempfile.TemporaryDirectory() as dirname: num_processes = 4 pool = Pool(processes=num_processes, initializer=init, initargs=(dirname,)) assert len(os.listdir(dirname)) == 4 pool.terminate() pool.join()
def test_ray_init(monkeypatch, shutdown_only): def getpid(args): return os.getpid() def check_pool_size(pool, size): args = [tuple() for _ in range(size)] assert len(set(pool.map(getpid, args))) == size # Check that starting a pool starts ray if not initialized. pool = Pool(processes=2) assert ray.is_initialized() assert int(ray.cluster_resources()["CPU"]) == 2 check_pool_size(pool, 2) pool.terminate() pool.join() ray.shutdown() # Set up the cluster id so that gcs is talking with a different # storage prefix monkeypatch.setenv("RAY_external_storage_namespace", "new_cluster") ray._raylet.Config.initialize("") # Check that starting a pool doesn't affect ray if there is a local # ray cluster running. ray.init(num_cpus=3) assert ray.is_initialized() pool = Pool(processes=2) assert int(ray.cluster_resources()["CPU"]) == 3 check_pool_size(pool, 2) pool.terminate() pool.join() ray.shutdown() # Check that trying to start a pool on an existing ray cluster throws an # error if there aren't enough CPUs for the number of processes. ray.init(num_cpus=1) assert ray.is_initialized() with pytest.raises(ValueError): Pool(processes=2) assert int(ray.cluster_resources()["CPU"]) == 1 ray.shutdown()
def test_connect_to_ray(monkeypatch, ray_start_cluster): def getpid(args): return os.getpid() def check_pool_size(pool, size): args = [tuple() for _ in range(size)] assert len(set(pool.map(getpid, args))) == size address = ray_start_cluster.address # Use different numbers of CPUs to distinguish between starting a local # ray cluster and connecting to an existing one. start_cpus = 1 # Set in fixture. init_cpus = 2 # Set up the cluster id so that gcs is talking with a different # storage prefix monkeypatch.setenv("RAY_external_storage_namespace", "new_cluster") ray._raylet.Config.initialize("") # Check that starting a pool still starts ray if RAY_ADDRESS not set. pool = Pool(processes=init_cpus) assert ray.is_initialized() assert int(ray.cluster_resources()["CPU"]) == init_cpus check_pool_size(pool, init_cpus) pool.terminate() pool.join() ray.shutdown() # Check that starting a pool connects to a running ray cluster if # ray_address is passed in. pool = Pool(ray_address=address) assert ray.is_initialized() assert int(ray.cluster_resources()["CPU"]) == start_cpus check_pool_size(pool, start_cpus) pool.terminate() pool.join() ray.shutdown() monkeypatch.setenv("RAY_external_storage_namespace", "new_cluster2") ray._raylet.Config.initialize("") # Set RAY_ADDRESS, so pools should connect to the running ray cluster. os.environ["RAY_ADDRESS"] = address # Check that starting a pool connects to a running ray cluster if # RAY_ADDRESS is set. pool = Pool() assert ray.is_initialized() assert int(ray.cluster_resources()["CPU"]) == start_cpus check_pool_size(pool, start_cpus) pool.terminate() pool.join() ray.shutdown() # Check that trying to start a pool on an existing ray cluster throws an # error if there aren't enough CPUs for the number of processes. with pytest.raises(Exception): Pool(processes=start_cpus + 1) assert int(ray.cluster_resources()["CPU"]) == start_cpus ray.shutdown()
def test_ray_init(shutdown_only): def getpid(args): return os.getpid() def check_pool_size(pool, size): args = [tuple() for _ in range(size)] assert len(set(pool.map(getpid, args))) == size # Check that starting a pool starts ray if not initialized. pool = Pool(processes=2) assert ray.is_initialized() assert int(ray.cluster_resources()["CPU"]) == 2 check_pool_size(pool, 2) pool.terminate() pool.join() ray.shutdown() # Check that starting a pool doesn't affect ray if there is a local # ray cluster running. ray.init(num_cpus=3) assert ray.is_initialized() pool = Pool(processes=2) assert int(ray.cluster_resources()["CPU"]) == 3 check_pool_size(pool, 2) pool.terminate() pool.join() ray.shutdown() # Check that trying to start a pool on an existing ray cluster throws an # error if there aren't enough CPUs for the number of processes. ray.init(num_cpus=1) assert ray.is_initialized() with pytest.raises(ValueError): Pool(processes=2) assert int(ray.cluster_resources()["CPU"]) == 1 ray.shutdown()
def pool_4_processes(): pool = Pool(processes=4) yield pool pool.terminate() pool.join() ray.shutdown()
def pool(): pool = Pool(processes=1) yield pool pool.terminate() ray.shutdown()
# Let's try multiprocess for each core # Since this is CPU I/O bound task, we should get better performance # the serial and threading # start = time.time() mp_pool = mp.Pool(get_cpu_count()) with mp_pool as p: prime_numbers = p.map(is_prime, list(range(num))) end = time.time() mp_pool.terminate() print( f"Multi Process access: Time elapsed: {end - start:4.2f} sec to compute all primes in {num} are {sum(list(prime_numbers))}" ) # Let's try that with Ray multiprocessing pool ray.init() ray_pool = Pool(get_cpu_count()) lst = list(range(num)) results = [] start = time.time() for result in ray_pool.map(is_prime, lst): results.append(result) end = time.time() ray_pool.terminate() print( f"Ray Distributed Multi Process access: Time elapsed: {end - start:4.2f} sec to compute all primes in {num} are {sum(results)}" ) ray.shutdown()