Exemplo n.º 1
0
class llc4320_benchmarks():
    """Zarr GCP tests on LLC4320 Datasets

    """
    timer = timeit.default_timer
    timeout = 3600
    repeat = 1
    number = 1
    warmup_time = 0.0
    run_nums = np.arange(1, RUNS + 1)
    params = (['GCS'], [1], [60, 80, 100, 120, 140, 160], run_nums)
    #params = (['GCS'], [1], [60], run_nums)
    #params = getTestConfigValue("gcp_kubernetes_read_zarr.llc4320_benchmarks")
    param_names = ['backend', 'z_chunksize', 'n_workers', 'run_num']

    @bmt.test_gcp
    def setup(self, backend, z_chunksize, n_workers, run_num):
        self.cluster = KubeCluster(n_workers=n_workers)
        self.client = Client(self.cluster)
        bmt.cluster_wait(self.client, n_workers)
        self.target = target_zarr.ZarrStore(backend=backend, dask=True)
        # Open Zarr DS
        self.ds_zarr = self.target.open_store(DS_STORE)
        self.ds_zarr_theta = self.ds_zarr.Theta

    @bmt.test_gcp
    def time_read(self, backend, z_chunksize, n_workers, run_num):
        self.ds_zarr_theta.max().load(retries=RETRIES)

    @bmt.test_gcp
    def teardown(self, backend, z_chunksize, n_workers, run_num):
        del self.ds_zarr_theta
        self.cluster.close()
class llc4320_benchmarks():
    """netCDF GCP tests on LLC4320 Datasets

    """
    timer = timeit.default_timer
    timeout = 3600
    repeat = 1
    number = 1
    warmup_time = 0.0
    run_nums = np.arange(1, RUNS + 1)
    #params = (['FUSE'], [90], [60, 80, 100, 120, 140, 160], run_nums)
    params = (['FUSE'], [10], [100], run_nums)
    param_names = ['backend', 'z_chunksize', 'n_workers', 'run_num']

    @bmt.test_gcp
    def setup(self, backend, z_chunksize, n_workers, run_num):
        self.cluster = KubeCluster(n_workers=n_workers)
        self.client = Client(self.cluster)
        bmt.cluster_wait(self.client, n_workers)
        self.target = target_zarr.ZarrStore(backend=backend, dask=True)
        # Open netCDF DS
        self.ds_netcdf = xr.open_mfdataset(DS_FILES,
                                           decode_cf=False,
                                           autoclose=True,
                                           chunks={'Z': z_chunksize})
        self.ds_netcdf_theta = self.ds_netcdf.Theta

    @bmt.test_gcp
    def time_read(self, backend, z_chunksize, n_workers, run_num):
        self.ds_netcdf_theta.max().load(retries=RETRIES)

    @bmt.test_gcp
    def teardown(self, backend, z_chunksize, n_workers, run_num):
        del self.ds_netcdf_theta
        self.cluster.close()
Exemplo n.º 3
0
 def setup(self, backend, z_chunksize, n_workers, run_num):
     self.cluster = KubeCluster(n_workers=n_workers)
     self.client = Client(self.cluster)
     bmt.cluster_wait(self.client, n_workers)
     self.target = target_zarr.ZarrStore(backend=backend, dask=True)
     # Open Zarr DS
     self.ds_zarr = self.target.open_store(DS_STORE)
     self.ds_zarr_theta = self.ds_zarr.Theta
Exemplo n.º 4
0
def test_namespace(pod_spec, loop, ns):
    with KubeCluster(pod_spec, loop=loop, namespace=ns) as cluster:
        assert 'dask' in cluster.name
        assert getpass.getuser() in cluster.name
        with KubeCluster(pod_spec, loop=loop, namespace=ns) as cluster2:
            assert cluster.name != cluster2.name

            cluster2.scale(1)
            [pod] = cluster2.pods()
Exemplo n.º 5
0
def test_bad_args():
    with pytest.raises(TypeError) as info:
        KubeCluster("myfile.yaml")

    assert "KubeCluster.from_yaml" in str(info.value)

    with pytest.raises((ValueError, TypeError)) as info:
        KubeCluster({"kind": "Pod"})

    assert "KubeCluster.from_dict" in str(info.value)
Exemplo n.º 6
0
async def test_namespace(k8s_cluster, pod_spec):
    async with KubeCluster(pod_spec, **cluster_kwargs) as cluster:
        assert "dask" in cluster.name
        assert getpass.getuser() in cluster.name
        async with KubeCluster(pod_spec, **cluster_kwargs) as cluster2:
            assert cluster.name != cluster2.name

            cluster2.scale(1)
            while len(await cluster2.pods()) != 1:
                await asyncio.sleep(0.1)
Exemplo n.º 7
0
def test_bad_args(loop):
    with pytest.raises(TypeError) as info:
        KubeCluster('myfile.yaml')

    assert 'KubeCluster.from_yaml' in str(info.value)

    with pytest.raises((ValueError, TypeError)) as info:
        KubeCluster({'kind': 'Pod'})

    assert 'KubeCluster.from_dict' in str(info.value)
Exemplo n.º 8
0
 def cluster(self):
     if not self._cluster:
         try:
             from dask_kubernetes import KubeCluster
         except ImportError as e:
             print(
                 'missing dask_kubernetes, please run "pip install dask_kubernetes"'
             )
             raise e
         self._cluster = KubeCluster(self.to_pod())
     return self._cluster
 def setup(self, backend, z_chunksize, n_workers, run_num):
     self.cluster = KubeCluster(n_workers=n_workers)
     self.client = Client(self.cluster)
     bmt.cluster_wait(self.client, n_workers)
     self.target = target_zarr.ZarrStore(backend=backend, dask=True)
     # Open netCDF DS
     self.ds_netcdf = xr.open_mfdataset(DS_FILES,
                                        decode_cf=False,
                                        autoclose=True,
                                        chunks={'Z': z_chunksize})
     self.ds_netcdf_theta = self.ds_netcdf.Theta
 def setup(self, backend, z_chunksize, n_workers, run_num):
     self.cluster = KubeCluster(n_workers=n_workers)
     self.client = Client(self.cluster)
     bmt.cluster_wait(self.client, n_workers)
     self.chunks = (3000, 3000, z_chunksize)
     self.da = da.random.normal(10, 0.1, size=DS_DIM, chunks=self.chunks)
     self.target = target_zarr.ZarrStore(backend=backend,
                                         dask=True,
                                         chunksize=self.chunks,
                                         shape=self.da.shape,
                                         dtype=self.da.dtype)
     self.target.get_temp_filepath()
Exemplo n.º 11
0
def make_kube(pod_spec, **kws):
    """Create a dask_kubernetes.KubeCluster.

    pod_spec is either the name of a YAML file containg the worker pod
    specification or a dict containing the specification directly.
    kws is passed to KubeCluster.from_yaml or .from_dict.
    """
    from dask_kubernetes import KubeCluster
    if isistance(pod_spec, str):
        return KubeCluster.from_yaml(pod_spec, **kws)
    else:
        return KubeCluster.from_dict(pod_spec, **kws)
Exemplo n.º 12
0
 def cluster(self, scale=0):
     if not self._cluster:
         try:
             from dask_kubernetes import KubeCluster
             from dask.distributed import Client
         except ImportError as e:
             print('missing dask_kubernetes, please run "pip install dask_kubernetes"')
             raise e
         self._cluster = KubeCluster(self.to_pod())
         if not scale:
             self._cluster.adapt()
         else:
             self._cluster.scale(scale)
         Client(self._cluster)
     return self._cluster
Exemplo n.º 13
0
def test_pod_template_minimal_dict(docker_image, loop):
    spec = {
        "spec": {
            "containers": [
                {
                    "args": [
                        "dask-worker",
                        "$(DASK_SCHEDULER_ADDRESS)",
                        "--nthreads",
                        "1",
                        "--death-timeout",
                        "60",
                    ],
                    "command": None,
                    "image": docker_image,
                    "imagePullPolicy": "IfNotPresent",
                    "name": "worker",
                }
            ]
        }
    }

    with KubeCluster(spec, loop=loop) as cluster:
        cluster.adapt()
        with Client(cluster, loop=loop) as client:
            future = client.submit(lambda x: x + 1, 10)
            result = future.result()
            assert result == 11
Exemplo n.º 14
0
    def run_flow(self) -> None:
        """
        Run the flow from specified flow_file_path location using a Dask executor
        """
        from prefect.engine import get_default_flow_runner_class
        from prefect.engine.executors import DaskExecutor
        from dask_kubernetes import KubeCluster

        with open(path.join(path.dirname(__file__),
                            "worker_pod.yaml")) as pod_file:
            worker_pod = yaml.safe_load(pod_file)
            worker_pod = self._populate_worker_pod_yaml(yaml_obj=worker_pod)

            cluster = KubeCluster.from_dict(worker_pod)
            cluster.adapt(minimum=1, maximum=1)

            # Load serialized flow from file and run it with a DaskExecutor
            with open(
                    prefect.context.get("flow_file_path",
                                        "/root/.prefect/flow_env.prefect"),
                    "rb",
            ) as f:
                flow = cloudpickle.load(f)

                executor = DaskExecutor(address=cluster.scheduler_address)
                runner_cls = get_default_flow_runner_class()
                runner_cls(flow=flow).run(executor=executor)
Exemplo n.º 15
0
class DaskDelegateConfig(DelegateConfig):
    redis_port = 6379
    redis_address = os.environ.get("REDIS_ADDRESS")
    redis_db = 0

    redis_cache_ttl = 60 * 60
    redis_vault_dir = "vault"

    dask_cluster_port = 8786
    dask_cluster_address = "localhost"
    dask_use_remote_cluster = False

    dask_worker_count = 1
    dask_worker_threads = 2
    dask_worker_memory_limit = "4GB"

    dask_dashboard_port = 8788
    dask_dashboard_address = "localhost"
    dask_dashboard_enabled = False

    kube_dask_worker_spec = os.environ.get("WORKER_SPEC_PATH")
    kube_cluster = None

    cache_provider: type[CacheProvider] = SimpleDiskCache(SimpleDiskCacheConfig())

    if kube_dask_worker_spec is not None:
        kube_cluster = KubeCluster(pod_template=kube_dask_worker_spec, n_workers=1)
Exemplo n.º 16
0
def test_extra_container_config_merge(image_name, loop):
    """
    Test that our container config merging process works recursively fine
    """
    with KubeCluster(
            make_pod_spec(
                image_name,
                extra_container_config={
                    "env": [{
                        "name": "BOO",
                        "value": "FOO"
                    }],
                    "args": ["last-item"],
                },
            ),
            loop=loop,
            n_workers=0,
            env={"TEST": "HI"},
    ) as cluster:

        pod = cluster.pod_template

        assert pod.spec.containers[0].env == [
            {
                "name": "TEST",
                "value": "HI"
            },
            {
                "name": "BOO",
                "value": "FOO"
            },
        ]

        assert pod.spec.containers[0].args[-1] == "last-item"
Exemplo n.º 17
0
async def test_escape_name(pod_spec, auth, ns):
    async with KubeCluster(pod_spec,
                           namespace=ns,
                           name="foo@bar",
                           auth=auth,
                           **cluster_kwargs) as cluster:
        assert "@" not in str(cluster.pod_template)
Exemplo n.º 18
0
async def remote_cluster(pod_spec, ns, auth):
    async with KubeCluster(pod_spec,
                           namespace=ns,
                           deploy_mode="remote",
                           auth=auth,
                           **cluster_kwargs) as cluster:
        yield cluster
Exemplo n.º 19
0
def test_escape_username(pod_spec, loop, ns, monkeypatch):
    monkeypatch.setenv('LOGNAME', 'foo!')

    with KubeCluster(pod_spec, loop=loop, namespace=ns) as cluster:
        assert 'foo' in cluster.name
        assert '!' not in cluster.name
        assert 'foo' in cluster.pod_template.metadata.labels['user']
Exemplo n.º 20
0
def test_automatic_startup(image_name, loop, ns):
    test_yaml = {
        "kind": "Pod",
        "metadata": {
            "labels": {
                "foo": "bar",
            }
        },
        "spec": {
            "containers": [{
                "args": [
                    "dask-worker", "$(DASK_SCHEDULER_ADDRESS)", "--nthreads",
                    "1"
                ],
                "image":
                image_name,
                "name":
                "dask-worker"
            }]
        }
    }

    with tmpfile(extension='yaml') as fn:
        with open(fn, mode='w') as f:
            yaml.dump(test_yaml, f)
        with dask.config.set({'kubernetes.worker-template-path': fn}):
            with KubeCluster(loop=loop, namespace=ns) as cluster:
                assert cluster.pod_template.metadata.labels['foo'] == 'bar'
Exemplo n.º 21
0
def test_pod_from_yaml_expand_env_vars(image_name, loop, ns):
    try:
        os.environ["FOO_IMAGE"] = image_name

        test_yaml = {
            "kind": "Pod",
            "metadata": {
                "labels": {
                    "app": "dask",
                    "component": "dask-worker"
                }
            },
            "spec": {
                "containers": [{
                    "args": [
                        "dask-worker",
                        "$(DASK_SCHEDULER_ADDRESS)",
                        "--nthreads",
                        "1"
                    ],
                    "image": '${FOO_IMAGE}',
                    'imagePullPolicy': 'IfNotPresent',
                    "name": "dask-worker"
                }]
            }
        }

        with tmpfile(extension='yaml') as fn:
            with open(fn, mode='w') as f:
                yaml.dump(test_yaml, f)
            with KubeCluster.from_yaml(f.name, loop=loop, namespace=ns) as cluster:
                assert cluster.pod_template.spec.containers[0].image == image_name
    finally:
        del os.environ['FOO_IMAGE']
Exemplo n.º 22
0
async def test_automatic_startup(image_name, ns, auth):
    test_yaml = {
        "kind": "Pod",
        "metadata": {"labels": {"foo": "bar"}},
        "spec": {
            "containers": [
                {
                    "args": [
                        "dask-worker",
                        "$(DASK_SCHEDULER_ADDRESS)",
                        "--nthreads",
                        "1",
                    ],
                    "image": image_name,
                    "name": "dask-worker",
                }
            ]
        },
    }

    with tmpfile(extension="yaml") as fn:
        with open(fn, mode="w") as f:
            yaml.dump(test_yaml, f)
        with dask.config.set({"kubernetes.worker-template-path": fn}):
            async with KubeCluster(
                namespace=ns, auth=auth, **cluster_kwargs
            ) as cluster:
                assert cluster.pod_template.metadata.labels["foo"] == "bar"
Exemplo n.º 23
0
async def test_pod_from_yaml_expand_env_vars(image_name, ns, auth):
    try:
        os.environ["FOO_IMAGE"] = image_name

        test_yaml = {
            "kind": "Pod",
            "metadata": {"labels": {"app": "dask", "component": "dask-worker"}},
            "spec": {
                "containers": [
                    {
                        "args": [
                            "dask-worker",
                            "$(DASK_SCHEDULER_ADDRESS)",
                            "--nthreads",
                            "1",
                        ],
                        "image": "${FOO_IMAGE}",
                        "imagePullPolicy": "IfNotPresent",
                        "name": "dask-worker",
                    }
                ]
            },
        }

        with tmpfile(extension="yaml") as fn:
            with open(fn, mode="w") as f:
                yaml.dump(test_yaml, f)
            async with KubeCluster.from_yaml(
                f.name, namespace=ns, auth=auth, **cluster_kwargs
            ) as cluster:
                assert cluster.pod_template.spec.containers[0].image == image_name
    finally:
        del os.environ["FOO_IMAGE"]
Exemplo n.º 24
0
def test_default_toleration_preserved(image_name):
    pod_spec = make_pod_spec(
        image=image_name,
        extra_pod_config={
            "tolerations": [{
                "key": "example.org/toleration",
                "operator": "Exists",
                "effect": "NoSchedule",
            }]
        },
    )
    cluster = KubeCluster(pod_spec)
    tolerations = cluster.pod_template.to_dict()["spec"]["tolerations"]
    assert {
        "key": "k8s.dask.org/dedicated",
        "operator": "Equal",
        "value": "worker",
        "effect": "NoSchedule",
        "toleration_seconds": None,
    } in tolerations
    assert {
        "key": "k8s.dask.org_dedicated",
        "operator": "Equal",
        "value": "worker",
        "effect": "NoSchedule",
        "toleration_seconds": None,
    } in tolerations
    assert {
        "key": "example.org/toleration",
        "operator": "Exists",
        "effect": "NoSchedule",
    } in tolerations
Exemplo n.º 25
0
def test_escape_username(pod_spec, monkeypatch):
    monkeypatch.setenv("LOGNAME", "Foo!")

    with KubeCluster(pod_spec) as cluster:
        assert "foo" in cluster.name
        assert "!" not in cluster.name
        assert "foo" in cluster.pod_template.metadata.labels["user"]
Exemplo n.º 26
0
    def run_flow(self) -> None:
        """
        Run the flow from specified flow_file_path location using a Dask executor
        """
        try:
            from prefect.engine import get_default_flow_runner_class
            from prefect.engine.executors import DaskExecutor
            from dask_kubernetes import KubeCluster

            with open(path.join(path.dirname(__file__),
                                "worker_pod.yaml")) as pod_file:
                worker_pod = yaml.safe_load(pod_file)
                worker_pod = self._populate_worker_pod_yaml(
                    yaml_obj=worker_pod)

                cluster = KubeCluster.from_dict(
                    worker_pod, namespace=prefect.context.get("namespace"))
                cluster.adapt(minimum=1, maximum=1)

                # Load serialized flow from file and run it with a DaskExecutor
                with open(
                        prefect.context.get("flow_file_path",
                                            "/root/.prefect/flow_env.prefect"),
                        "rb",
                ) as f:
                    flow = cloudpickle.load(f)

                    executor = DaskExecutor(address=cluster.scheduler_address)
                    runner_cls = get_default_flow_runner_class()
                    runner_cls(flow=flow).run(executor=executor)
                    sys.exit(0)  # attempt to force resource cleanup
        except Exception as exc:
            self.logger.error(
                "Unexpected error raised during flow run: {}".format(exc))
            raise exc
Exemplo n.º 27
0
def test_pod_from_dict(image_name, loop, ns):
    spec = {
        'metadata': {},
        'restartPolicy': 'Never',
        'spec': {
            'containers': [{
                'args': ['dask-worker', '$(DASK_SCHEDULER_ADDRESS)',
                         '--nthreads', '1',
                         '--death-timeout', '60'],
                'command': None,
                'image': image_name,
                'imagePullPolicy': 'IfNotPresent',
                'name': 'dask-worker',
            }]
        }
    }

    with KubeCluster.from_dict(spec, loop=loop, namespace=ns) as cluster:
        cluster.scale(2)
        with Client(cluster) as client:
            future = client.submit(lambda x: x + 1, 10)
            result = future.result()
            assert result == 11

            while len(cluster.scheduler.workers) < 2:
                sleep(0.1)

            # Ensure that inter-worker communication works well
            futures = client.map(lambda x: x + 1, range(10))
            total = client.submit(sum, futures)
            assert total.result() == sum(map(lambda x: x + 1, range(10)))
            assert all(client.has_what().values())
Exemplo n.º 28
0
async def test_pod_from_minimal_dict(image_name, ns, auth):
    spec = {
        "spec": {
            "containers": [
                {
                    "args": [
                        "dask-worker",
                        "$(DASK_SCHEDULER_ADDRESS)",
                        "--nthreads",
                        "1",
                        "--death-timeout",
                        "60",
                    ],
                    "command": None,
                    "image": image_name,
                    "imagePullPolicy": "IfNotPresent",
                    "name": "worker",
                }
            ]
        }
    }

    async with KubeCluster.from_dict(
        spec, namespace=ns, auth=auth, **cluster_kwargs
    ) as cluster:
        cluster.adapt()
        async with Client(cluster, asynchronous=True) as client:
            future = client.submit(lambda x: x + 1, 10)
            result = await future
            assert result == 11
Exemplo n.º 29
0
def test_pod_from_minimal_dict(image_name, loop, ns):
    spec = {
        'spec': {
            'containers': [{
                'args': [
                    'dask-worker', '$(DASK_SCHEDULER_ADDRESS)', '--nthreads',
                    '1', '--death-timeout', '60'
                ],
                'command':
                None,
                'image':
                image_name,
                'imagePullPolicy':
                'IfNotPresent',
                'name':
                'worker'
            }]
        }
    }

    with KubeCluster.from_dict(spec, loop=loop, namespace=ns) as cluster:
        cluster.adapt()
        with Client(cluster) as client:
            future = client.submit(lambda x: x + 1, 10)
            result = future.result()
            assert result == 11
Exemplo n.º 30
0
async def test_start_with_workers(pod_spec, ns, auth):
    async with KubeCluster(
        pod_spec, n_workers=2, namespace=ns, auth=auth, **cluster_kwargs
    ) as cluster:
        async with Client(cluster, asynchronous=True) as client:
            while len(cluster.scheduler_info["workers"]) != 2:
                await asyncio.sleep(0.1)
Exemplo n.º 31
0
    def _get_kubernetes_cluster(worker_template_path=WORKER_TEMPLATE_PATH):
        from dask_kubernetes import KubeCluster

        cluster = KubeCluster.from_yaml(worker_template_path)
        return Client(cluster)