class llc4320_benchmarks(): """Zarr GCP tests on LLC4320 Datasets """ timer = timeit.default_timer timeout = 3600 repeat = 1 number = 1 warmup_time = 0.0 run_nums = np.arange(1, RUNS + 1) params = (['GCS'], [1], [60, 80, 100, 120, 140, 160], run_nums) #params = (['GCS'], [1], [60], run_nums) #params = getTestConfigValue("gcp_kubernetes_read_zarr.llc4320_benchmarks") param_names = ['backend', 'z_chunksize', 'n_workers', 'run_num'] @bmt.test_gcp def setup(self, backend, z_chunksize, n_workers, run_num): self.cluster = KubeCluster(n_workers=n_workers) self.client = Client(self.cluster) bmt.cluster_wait(self.client, n_workers) self.target = target_zarr.ZarrStore(backend=backend, dask=True) # Open Zarr DS self.ds_zarr = self.target.open_store(DS_STORE) self.ds_zarr_theta = self.ds_zarr.Theta @bmt.test_gcp def time_read(self, backend, z_chunksize, n_workers, run_num): self.ds_zarr_theta.max().load(retries=RETRIES) @bmt.test_gcp def teardown(self, backend, z_chunksize, n_workers, run_num): del self.ds_zarr_theta self.cluster.close()
class llc4320_benchmarks(): """netCDF GCP tests on LLC4320 Datasets """ timer = timeit.default_timer timeout = 3600 repeat = 1 number = 1 warmup_time = 0.0 run_nums = np.arange(1, RUNS + 1) #params = (['FUSE'], [90], [60, 80, 100, 120, 140, 160], run_nums) params = (['FUSE'], [10], [100], run_nums) param_names = ['backend', 'z_chunksize', 'n_workers', 'run_num'] @bmt.test_gcp def setup(self, backend, z_chunksize, n_workers, run_num): self.cluster = KubeCluster(n_workers=n_workers) self.client = Client(self.cluster) bmt.cluster_wait(self.client, n_workers) self.target = target_zarr.ZarrStore(backend=backend, dask=True) # Open netCDF DS self.ds_netcdf = xr.open_mfdataset(DS_FILES, decode_cf=False, autoclose=True, chunks={'Z': z_chunksize}) self.ds_netcdf_theta = self.ds_netcdf.Theta @bmt.test_gcp def time_read(self, backend, z_chunksize, n_workers, run_num): self.ds_netcdf_theta.max().load(retries=RETRIES) @bmt.test_gcp def teardown(self, backend, z_chunksize, n_workers, run_num): del self.ds_netcdf_theta self.cluster.close()
def setup(self, backend, z_chunksize, n_workers, run_num): self.cluster = KubeCluster(n_workers=n_workers) self.client = Client(self.cluster) bmt.cluster_wait(self.client, n_workers) self.target = target_zarr.ZarrStore(backend=backend, dask=True) # Open Zarr DS self.ds_zarr = self.target.open_store(DS_STORE) self.ds_zarr_theta = self.ds_zarr.Theta
def test_namespace(pod_spec, loop, ns): with KubeCluster(pod_spec, loop=loop, namespace=ns) as cluster: assert 'dask' in cluster.name assert getpass.getuser() in cluster.name with KubeCluster(pod_spec, loop=loop, namespace=ns) as cluster2: assert cluster.name != cluster2.name cluster2.scale(1) [pod] = cluster2.pods()
def test_bad_args(): with pytest.raises(TypeError) as info: KubeCluster("myfile.yaml") assert "KubeCluster.from_yaml" in str(info.value) with pytest.raises((ValueError, TypeError)) as info: KubeCluster({"kind": "Pod"}) assert "KubeCluster.from_dict" in str(info.value)
async def test_namespace(k8s_cluster, pod_spec): async with KubeCluster(pod_spec, **cluster_kwargs) as cluster: assert "dask" in cluster.name assert getpass.getuser() in cluster.name async with KubeCluster(pod_spec, **cluster_kwargs) as cluster2: assert cluster.name != cluster2.name cluster2.scale(1) while len(await cluster2.pods()) != 1: await asyncio.sleep(0.1)
def test_bad_args(loop): with pytest.raises(TypeError) as info: KubeCluster('myfile.yaml') assert 'KubeCluster.from_yaml' in str(info.value) with pytest.raises((ValueError, TypeError)) as info: KubeCluster({'kind': 'Pod'}) assert 'KubeCluster.from_dict' in str(info.value)
def cluster(self): if not self._cluster: try: from dask_kubernetes import KubeCluster except ImportError as e: print( 'missing dask_kubernetes, please run "pip install dask_kubernetes"' ) raise e self._cluster = KubeCluster(self.to_pod()) return self._cluster
def setup(self, backend, z_chunksize, n_workers, run_num): self.cluster = KubeCluster(n_workers=n_workers) self.client = Client(self.cluster) bmt.cluster_wait(self.client, n_workers) self.target = target_zarr.ZarrStore(backend=backend, dask=True) # Open netCDF DS self.ds_netcdf = xr.open_mfdataset(DS_FILES, decode_cf=False, autoclose=True, chunks={'Z': z_chunksize}) self.ds_netcdf_theta = self.ds_netcdf.Theta
def setup(self, backend, z_chunksize, n_workers, run_num): self.cluster = KubeCluster(n_workers=n_workers) self.client = Client(self.cluster) bmt.cluster_wait(self.client, n_workers) self.chunks = (3000, 3000, z_chunksize) self.da = da.random.normal(10, 0.1, size=DS_DIM, chunks=self.chunks) self.target = target_zarr.ZarrStore(backend=backend, dask=True, chunksize=self.chunks, shape=self.da.shape, dtype=self.da.dtype) self.target.get_temp_filepath()
def make_kube(pod_spec, **kws): """Create a dask_kubernetes.KubeCluster. pod_spec is either the name of a YAML file containg the worker pod specification or a dict containing the specification directly. kws is passed to KubeCluster.from_yaml or .from_dict. """ from dask_kubernetes import KubeCluster if isistance(pod_spec, str): return KubeCluster.from_yaml(pod_spec, **kws) else: return KubeCluster.from_dict(pod_spec, **kws)
def cluster(self, scale=0): if not self._cluster: try: from dask_kubernetes import KubeCluster from dask.distributed import Client except ImportError as e: print('missing dask_kubernetes, please run "pip install dask_kubernetes"') raise e self._cluster = KubeCluster(self.to_pod()) if not scale: self._cluster.adapt() else: self._cluster.scale(scale) Client(self._cluster) return self._cluster
def test_pod_template_minimal_dict(docker_image, loop): spec = { "spec": { "containers": [ { "args": [ "dask-worker", "$(DASK_SCHEDULER_ADDRESS)", "--nthreads", "1", "--death-timeout", "60", ], "command": None, "image": docker_image, "imagePullPolicy": "IfNotPresent", "name": "worker", } ] } } with KubeCluster(spec, loop=loop) as cluster: cluster.adapt() with Client(cluster, loop=loop) as client: future = client.submit(lambda x: x + 1, 10) result = future.result() assert result == 11
def run_flow(self) -> None: """ Run the flow from specified flow_file_path location using a Dask executor """ from prefect.engine import get_default_flow_runner_class from prefect.engine.executors import DaskExecutor from dask_kubernetes import KubeCluster with open(path.join(path.dirname(__file__), "worker_pod.yaml")) as pod_file: worker_pod = yaml.safe_load(pod_file) worker_pod = self._populate_worker_pod_yaml(yaml_obj=worker_pod) cluster = KubeCluster.from_dict(worker_pod) cluster.adapt(minimum=1, maximum=1) # Load serialized flow from file and run it with a DaskExecutor with open( prefect.context.get("flow_file_path", "/root/.prefect/flow_env.prefect"), "rb", ) as f: flow = cloudpickle.load(f) executor = DaskExecutor(address=cluster.scheduler_address) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor)
class DaskDelegateConfig(DelegateConfig): redis_port = 6379 redis_address = os.environ.get("REDIS_ADDRESS") redis_db = 0 redis_cache_ttl = 60 * 60 redis_vault_dir = "vault" dask_cluster_port = 8786 dask_cluster_address = "localhost" dask_use_remote_cluster = False dask_worker_count = 1 dask_worker_threads = 2 dask_worker_memory_limit = "4GB" dask_dashboard_port = 8788 dask_dashboard_address = "localhost" dask_dashboard_enabled = False kube_dask_worker_spec = os.environ.get("WORKER_SPEC_PATH") kube_cluster = None cache_provider: type[CacheProvider] = SimpleDiskCache(SimpleDiskCacheConfig()) if kube_dask_worker_spec is not None: kube_cluster = KubeCluster(pod_template=kube_dask_worker_spec, n_workers=1)
def test_extra_container_config_merge(image_name, loop): """ Test that our container config merging process works recursively fine """ with KubeCluster( make_pod_spec( image_name, extra_container_config={ "env": [{ "name": "BOO", "value": "FOO" }], "args": ["last-item"], }, ), loop=loop, n_workers=0, env={"TEST": "HI"}, ) as cluster: pod = cluster.pod_template assert pod.spec.containers[0].env == [ { "name": "TEST", "value": "HI" }, { "name": "BOO", "value": "FOO" }, ] assert pod.spec.containers[0].args[-1] == "last-item"
async def test_escape_name(pod_spec, auth, ns): async with KubeCluster(pod_spec, namespace=ns, name="foo@bar", auth=auth, **cluster_kwargs) as cluster: assert "@" not in str(cluster.pod_template)
async def remote_cluster(pod_spec, ns, auth): async with KubeCluster(pod_spec, namespace=ns, deploy_mode="remote", auth=auth, **cluster_kwargs) as cluster: yield cluster
def test_escape_username(pod_spec, loop, ns, monkeypatch): monkeypatch.setenv('LOGNAME', 'foo!') with KubeCluster(pod_spec, loop=loop, namespace=ns) as cluster: assert 'foo' in cluster.name assert '!' not in cluster.name assert 'foo' in cluster.pod_template.metadata.labels['user']
def test_automatic_startup(image_name, loop, ns): test_yaml = { "kind": "Pod", "metadata": { "labels": { "foo": "bar", } }, "spec": { "containers": [{ "args": [ "dask-worker", "$(DASK_SCHEDULER_ADDRESS)", "--nthreads", "1" ], "image": image_name, "name": "dask-worker" }] } } with tmpfile(extension='yaml') as fn: with open(fn, mode='w') as f: yaml.dump(test_yaml, f) with dask.config.set({'kubernetes.worker-template-path': fn}): with KubeCluster(loop=loop, namespace=ns) as cluster: assert cluster.pod_template.metadata.labels['foo'] == 'bar'
def test_pod_from_yaml_expand_env_vars(image_name, loop, ns): try: os.environ["FOO_IMAGE"] = image_name test_yaml = { "kind": "Pod", "metadata": { "labels": { "app": "dask", "component": "dask-worker" } }, "spec": { "containers": [{ "args": [ "dask-worker", "$(DASK_SCHEDULER_ADDRESS)", "--nthreads", "1" ], "image": '${FOO_IMAGE}', 'imagePullPolicy': 'IfNotPresent', "name": "dask-worker" }] } } with tmpfile(extension='yaml') as fn: with open(fn, mode='w') as f: yaml.dump(test_yaml, f) with KubeCluster.from_yaml(f.name, loop=loop, namespace=ns) as cluster: assert cluster.pod_template.spec.containers[0].image == image_name finally: del os.environ['FOO_IMAGE']
async def test_automatic_startup(image_name, ns, auth): test_yaml = { "kind": "Pod", "metadata": {"labels": {"foo": "bar"}}, "spec": { "containers": [ { "args": [ "dask-worker", "$(DASK_SCHEDULER_ADDRESS)", "--nthreads", "1", ], "image": image_name, "name": "dask-worker", } ] }, } with tmpfile(extension="yaml") as fn: with open(fn, mode="w") as f: yaml.dump(test_yaml, f) with dask.config.set({"kubernetes.worker-template-path": fn}): async with KubeCluster( namespace=ns, auth=auth, **cluster_kwargs ) as cluster: assert cluster.pod_template.metadata.labels["foo"] == "bar"
async def test_pod_from_yaml_expand_env_vars(image_name, ns, auth): try: os.environ["FOO_IMAGE"] = image_name test_yaml = { "kind": "Pod", "metadata": {"labels": {"app": "dask", "component": "dask-worker"}}, "spec": { "containers": [ { "args": [ "dask-worker", "$(DASK_SCHEDULER_ADDRESS)", "--nthreads", "1", ], "image": "${FOO_IMAGE}", "imagePullPolicy": "IfNotPresent", "name": "dask-worker", } ] }, } with tmpfile(extension="yaml") as fn: with open(fn, mode="w") as f: yaml.dump(test_yaml, f) async with KubeCluster.from_yaml( f.name, namespace=ns, auth=auth, **cluster_kwargs ) as cluster: assert cluster.pod_template.spec.containers[0].image == image_name finally: del os.environ["FOO_IMAGE"]
def test_default_toleration_preserved(image_name): pod_spec = make_pod_spec( image=image_name, extra_pod_config={ "tolerations": [{ "key": "example.org/toleration", "operator": "Exists", "effect": "NoSchedule", }] }, ) cluster = KubeCluster(pod_spec) tolerations = cluster.pod_template.to_dict()["spec"]["tolerations"] assert { "key": "k8s.dask.org/dedicated", "operator": "Equal", "value": "worker", "effect": "NoSchedule", "toleration_seconds": None, } in tolerations assert { "key": "k8s.dask.org_dedicated", "operator": "Equal", "value": "worker", "effect": "NoSchedule", "toleration_seconds": None, } in tolerations assert { "key": "example.org/toleration", "operator": "Exists", "effect": "NoSchedule", } in tolerations
def test_escape_username(pod_spec, monkeypatch): monkeypatch.setenv("LOGNAME", "Foo!") with KubeCluster(pod_spec) as cluster: assert "foo" in cluster.name assert "!" not in cluster.name assert "foo" in cluster.pod_template.metadata.labels["user"]
def run_flow(self) -> None: """ Run the flow from specified flow_file_path location using a Dask executor """ try: from prefect.engine import get_default_flow_runner_class from prefect.engine.executors import DaskExecutor from dask_kubernetes import KubeCluster with open(path.join(path.dirname(__file__), "worker_pod.yaml")) as pod_file: worker_pod = yaml.safe_load(pod_file) worker_pod = self._populate_worker_pod_yaml( yaml_obj=worker_pod) cluster = KubeCluster.from_dict( worker_pod, namespace=prefect.context.get("namespace")) cluster.adapt(minimum=1, maximum=1) # Load serialized flow from file and run it with a DaskExecutor with open( prefect.context.get("flow_file_path", "/root/.prefect/flow_env.prefect"), "rb", ) as f: flow = cloudpickle.load(f) executor = DaskExecutor(address=cluster.scheduler_address) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor) sys.exit(0) # attempt to force resource cleanup except Exception as exc: self.logger.error( "Unexpected error raised during flow run: {}".format(exc)) raise exc
def test_pod_from_dict(image_name, loop, ns): spec = { 'metadata': {}, 'restartPolicy': 'Never', 'spec': { 'containers': [{ 'args': ['dask-worker', '$(DASK_SCHEDULER_ADDRESS)', '--nthreads', '1', '--death-timeout', '60'], 'command': None, 'image': image_name, 'imagePullPolicy': 'IfNotPresent', 'name': 'dask-worker', }] } } with KubeCluster.from_dict(spec, loop=loop, namespace=ns) as cluster: cluster.scale(2) with Client(cluster) as client: future = client.submit(lambda x: x + 1, 10) result = future.result() assert result == 11 while len(cluster.scheduler.workers) < 2: sleep(0.1) # Ensure that inter-worker communication works well futures = client.map(lambda x: x + 1, range(10)) total = client.submit(sum, futures) assert total.result() == sum(map(lambda x: x + 1, range(10))) assert all(client.has_what().values())
async def test_pod_from_minimal_dict(image_name, ns, auth): spec = { "spec": { "containers": [ { "args": [ "dask-worker", "$(DASK_SCHEDULER_ADDRESS)", "--nthreads", "1", "--death-timeout", "60", ], "command": None, "image": image_name, "imagePullPolicy": "IfNotPresent", "name": "worker", } ] } } async with KubeCluster.from_dict( spec, namespace=ns, auth=auth, **cluster_kwargs ) as cluster: cluster.adapt() async with Client(cluster, asynchronous=True) as client: future = client.submit(lambda x: x + 1, 10) result = await future assert result == 11
def test_pod_from_minimal_dict(image_name, loop, ns): spec = { 'spec': { 'containers': [{ 'args': [ 'dask-worker', '$(DASK_SCHEDULER_ADDRESS)', '--nthreads', '1', '--death-timeout', '60' ], 'command': None, 'image': image_name, 'imagePullPolicy': 'IfNotPresent', 'name': 'worker' }] } } with KubeCluster.from_dict(spec, loop=loop, namespace=ns) as cluster: cluster.adapt() with Client(cluster) as client: future = client.submit(lambda x: x + 1, 10) result = future.result() assert result == 11
async def test_start_with_workers(pod_spec, ns, auth): async with KubeCluster( pod_spec, n_workers=2, namespace=ns, auth=auth, **cluster_kwargs ) as cluster: async with Client(cluster, asynchronous=True) as client: while len(cluster.scheduler_info["workers"]) != 2: await asyncio.sleep(0.1)
def _get_kubernetes_cluster(worker_template_path=WORKER_TEMPLATE_PATH): from dask_kubernetes import KubeCluster cluster = KubeCluster.from_yaml(worker_template_path) return Client(cluster)