Пример #1
0
def launch_dask(n_gpus, min_gpus, k8s, adapt, worker_spec):
    if k8s:
        if worker_spec is None:
            worker_spec = default_worker_spec_fname
            print(f'Creating a default K8S worker spec at {worker_spec}')
            with open(worker_spec, "w") as yaml_file:
                yaml_file.write(default_worker_spec)

        cluster = KubeCluster.from_yaml(worker_spec)
        if adapt:
            cluster.adapt(minimum=min_gpus, maximum=n_gpus)
            print(
                f'Launching Adaptive K8S Dask cluster with [{min_gpus}, {n_gpus}] workers'
            )
        else:
            cluster.scale(n_gpus)
            print(f'Launching K8S Dask cluster with {n_gpus} workers')
        sleep(10)
    else:
        cluster = LocalCUDACluster(ip="", n_workers=n_gpus)
        print(f'Launching Local Dask cluster with {n_gpus} GPUs')

    client = Client(cluster)
    print(client)
    print(cluster)
    return client, cluster
Пример #2
0
def test_pod_from_yaml_expand_env_vars(image_name, loop, ns):
    try:
        os.environ["FOO_IMAGE"] = image_name

        test_yaml = {
            "kind": "Pod",
            "metadata": {
                "labels": {
                    "app": "dask",
                    "component": "dask-worker"
                }
            },
            "spec": {
                "containers": [{
                    "args": [
                        "dask-worker",
                        "$(DASK_SCHEDULER_ADDRESS)",
                        "--nthreads",
                        "1"
                    ],
                    "image": '${FOO_IMAGE}',
                    'imagePullPolicy': 'IfNotPresent',
                    "name": "dask-worker"
                }]
            }
        }

        with tmpfile(extension='yaml') as fn:
            with open(fn, mode='w') as f:
                yaml.dump(test_yaml, f)
            with KubeCluster.from_yaml(f.name, loop=loop, namespace=ns) as cluster:
                assert cluster.pod_template.spec.containers[0].image == image_name
    finally:
        del os.environ['FOO_IMAGE']
Пример #3
0
async def test_pod_from_yaml_expand_env_vars(image_name, ns, auth):
    try:
        os.environ["FOO_IMAGE"] = image_name

        test_yaml = {
            "kind": "Pod",
            "metadata": {"labels": {"app": "dask", "component": "dask-worker"}},
            "spec": {
                "containers": [
                    {
                        "args": [
                            "dask-worker",
                            "$(DASK_SCHEDULER_ADDRESS)",
                            "--nthreads",
                            "1",
                        ],
                        "image": "${FOO_IMAGE}",
                        "imagePullPolicy": "IfNotPresent",
                        "name": "dask-worker",
                    }
                ]
            },
        }

        with tmpfile(extension="yaml") as fn:
            with open(fn, mode="w") as f:
                yaml.dump(test_yaml, f)
            async with KubeCluster.from_yaml(
                f.name, namespace=ns, auth=auth, **cluster_kwargs
            ) as cluster:
                assert cluster.pod_template.spec.containers[0].image == image_name
    finally:
        del os.environ["FOO_IMAGE"]
Пример #4
0
def make_kube(pod_spec, **kws):
    """Create a dask_kubernetes.KubeCluster.

    pod_spec is either the name of a YAML file containg the worker pod
    specification or a dict containing the specification directly.
    kws is passed to KubeCluster.from_yaml or .from_dict.
    """
    from dask_kubernetes import KubeCluster
    if isistance(pod_spec, str):
        return KubeCluster.from_yaml(pod_spec, **kws)
    else:
        return KubeCluster.from_dict(pod_spec, **kws)
Пример #5
0
async def test_pod_from_yaml(image_name, ns, auth):
    test_yaml = {
        "kind": "Pod",
        "metadata": {
            "labels": {
                "app": "dask",
                "component": "dask-worker"
            }
        },
        "spec": {
            "containers": [{
                "args": [
                    "dask-worker",
                    "$(DASK_SCHEDULER_ADDRESS)",
                    "--nthreads",
                    "1",
                ],
                "image":
                image_name,
                "imagePullPolicy":
                "IfNotPresent",
                "name":
                "dask-worker",
            }]
        },
    }

    with tmpfile(extension="yaml") as fn:
        with open(fn, mode="w") as f:
            yaml.dump(test_yaml, f)
        async with KubeCluster.from_yaml(f.name,
                                         namespace=ns,
                                         auth=auth,
                                         **cluster_kwargs) as cluster:
            assert cluster.namespace == ns
            cluster.scale(2)
            await cluster
            async with Client(cluster, asynchronous=True) as client:
                future = client.submit(lambda x: x + 1, 10)
                result = await future.result(timeout=10)
                assert result == 11

                start = time()
                while len(cluster.scheduler_info["workers"]) < 2:
                    await asyncio.sleep(0.1)
                    assert time() < start + 20, "timeout"

                # Ensure that inter-worker communication works well
                futures = client.map(lambda x: x + 1, range(10))
                total = client.submit(sum, futures)
                assert (await total) == sum(map(lambda x: x + 1, range(10)))
                assert all((await client.has_what()).values())
Пример #6
0
def test_pod_from_yaml(image_name, loop, ns):
    test_yaml = {
        "kind": "Pod",
        "metadata": {
            "labels": {
                "app": "dask",
                "component": "dask-worker"
            }
        },
        "spec": {
            "containers": [{
                "args": [
                    "dask-worker", "$(DASK_SCHEDULER_ADDRESS)", "--nthreads",
                    "1"
                ],
                "image":
                image_name,
                'imagePullPolicy':
                'IfNotPresent',
                "name":
                "dask-worker"
            }]
        }
    }

    with tmpfile(extension='yaml') as fn:
        with open(fn, mode='w') as f:
            yaml.dump(test_yaml, f)
        with KubeCluster.from_yaml(f.name, loop=loop, namespace=ns) as cluster:
            assert cluster.namespace == ns
            cluster.scale(2)
            with Client(cluster) as client:
                future = client.submit(lambda x: x + 1, 10)
                result = future.result(timeout=10)
                assert result == 11

                start = time()
                while len(cluster.scheduler.workers) < 2:
                    sleep(0.1)
                    assert time() < start + 10, 'timeout'

                # Ensure that inter-worker communication works well
                futures = client.map(lambda x: x + 1, range(10))
                total = client.submit(sum, futures)
                assert total.result() == sum(map(lambda x: x + 1, range(10)))
                assert all(client.has_what().values())
Пример #7
0
#!/usr/bin/env python
import logging
import distributed
import dask.array as da
from dask_kubernetes import KubeCluster

logging.basicConfig(level=logging.INFO)

with KubeCluster.from_yaml('/usr/src/app/specs/worker-spec.yaml') as cluster:
    cluster.scale(4)
    # Connect dask to the cluster
    client = distributed.Client(cluster)

    # Create an array and calculate the mean
    array = da.ones((1000, 1000, 1000), chunks=(100, 100, 10))
    print(array.mean().compute())  # Should print 1.0
Пример #8
0
#!/usr/bin/env python
# coding: utf-8

# In[14]:

from dask.distributed import Client

# In[15]:

from dask_kubernetes import KubeCluster

# In[16]:

cluster = KubeCluster.from_yaml("worker-spec.yml")
cluster.scale(1)

# In[13]:

cluster.close()

# In[ ]:
Пример #9
0
import torch
import ase

from ase.db import connect

from al_mlp.offline_active_learner import OfflineActiveLearner
from al_mlp.base_calcs.morse import MultiMorse
from al_mlp.atomistic_methods import Relaxation

from amptorch.trainer import AtomsTrainer

from dask_kubernetes import KubeCluster
from dask.distributed import Client

cluster = KubeCluster.from_yaml(
    "/home/jovyan/al_mlp/examples/offline_al_dask_example/dask-worker-cpu-spec.yml"
)
client = Client(cluster)
cluster.adapt(minimum=0, maximum=4)

# only necessary to upload egg file to
# workers if al_mlp is not in the environment but

files_list = ["al_mlp-0.1-py3.6.egg"]

# for i in range(len(files_list)):
#     fname = files_list[i]
#     with open(fname, "rb") as f:
#         data = f.read()

#     def _worker_upload(dask_worker, *, data, fname):
Пример #10
0
#!/usr/bin/env python
# coding: utf-8

# In[2]:


from dask_kubernetes import KubeCluster


# In[ ]:


# Initial attempt at creation (also failed do to networking)
#tag::create_in_default[]
cluster = KubeCluster.from_yaml('worker-spec.yaml')
#end::create_in_default[]


# In[5]:


# This is the one where it failed because I was running it outside a cluster and it could not communicate
#tag::create_in_namespace[]
cluster = KubeCluster.from_yaml('worker-spec.yaml', namespace='dask')
#end::create_in_namespace[]


# In[3]:


cluster.adapt(minimum=1, maximum=100)
Пример #11
0
    def _get_kubernetes_cluster(worker_template_path=WORKER_TEMPLATE_PATH):
        from dask_kubernetes import KubeCluster

        cluster = KubeCluster.from_yaml(worker_template_path)
        return Client(cluster)
Пример #12
0
                            cv=10,
                            return_train_score=False,
                            verbose=100)

##################################################################
from dask_kubernetes import KubeCluster
from dask.distributed import Client
import os
os.environ['SKLEARN_SITE_JOBLIB'] = "1"
from dask.distributed import Client
from sklearn import datasets, linear_model
from sklearn.model_selection import cross_validate
from pyitab.ext.sklearn._validation import cross_validate
import joblib

cluster = KubeCluster.from_yaml('pods.yml')

pods = cluster.scale(6)
client = Client(cluster.scheduler_address)

diabetes = datasets.load_diabetes()
from dask.array import from_array
X = from_array(diabetes.data, chunks='auto')
y = from_array(diabetes.target, chunks='auto')

model = linear_model.LinearRegression()

with joblib.parallel_backend('dask', scatter=[model, X, y]):
    cv_results = cross_validate(model,
                                X,
                                y,
Пример #13
0
from dask_kubernetes import KubeCluster
import os
import time

cluster = KubeCluster.from_yaml('worker-spec.yaml',
                                port=8786,
                                diagnostics_port=8787)

# Get ENV variables for cluster scaling config
# More info: https://github.com/dask/distributed/blob/master/distributed/deploy/adaptive.py
min_workers_number = int(os.getenv('DASK_CLUSTER_MIN_WORKERS', 1))
max_workers_number = int(os.getenv('DASK_CLUSTER_MAX_WORKERS', 5))
startup_cost = os.getenv('DASK_CLUSTER_STARTUP_COST', '10s')
target_duration = os.getenv('DASK_CLUSTER_TARGET_DURATION', '10s')
wait_count = int(os.getenv('DASK_CLUSTER_WAIT_COUNT', 3))
check_interval = os.getenv('DASK_CLUSTER_CHECK_INTERVAL', '5s')

cluster.adapt(minimum=min_workers_number,
              maximum=max_workers_number,
              startup_cost=startup_cost,
              target_duration=target_duration,
              wait_count=wait_count,
              interval=check_interval)
while True:
    time.sleep(0.1)
Пример #14
0
from dask_kubernetes import KubeCluster
import numpy as np

# In[ ]:

#tag::remote_lb_deploy[]

# In[2]:

# Specify a remote deployment using a load blanacer, necessary for communication with notebook from cluster
dask.config.set({"kubernetes.scheduler-service-type": "LoadBalancer"})

# In[4]:

cluster = KubeCluster.from_yaml('worker-spec.yaml',
                                namespace='dask',
                                deploy_mode='remote')

# In[ ]:

#end::remote_lb_deploy[]

# In[5]:

cluster.adapt(minimum=1, maximum=100)

# In[6]:

# Example usage
from dask.distributed import Client
import dask.array as da
Пример #15
0
        sigma=0.2,
        ibrion=2,
        nsw=1000,
        #lorbit=11,
        potim=0.2,
        isif=0,
        #ediffg=-0.02,
        #ediff=1e-6,
        lcharg=False,
        lwave=False,
        lreal=False,
        ispin=2,
        isym=0)

    # Run between 0 and 4 1-core/1-gpu workers on the kube cluster
    cluster = KubeCluster.from_yaml('worker-cpu-spec.yml')
    client = Client(cluster)
    #cluster.adapt(minimum=0, maximum=10)
    cluster.scale(10)

    files_list = ['deap_ga.py', 'fillPool.py', 'mutations.py', 'utils.py']
    for i in range(len(files_list)):
        fname = files_list[i]
        with open(fname, 'rb') as f:
            data = f.read()

        def _worker_upload(dask_worker, *, data, fname):
            dask_worker.loop.add_callback(
                callback=dask_worker.upload_file,
                comm=None,  # not used
                filename=fname,
Пример #16
0
from dask_kubernetes import KubeCluster
import dask.config
#import dask.distributed

dask.config.set({'kubernetes.name': 'myproject'})
cluster = KubeCluster.from_yaml('/worker-spec.yml')
cluster.scale_up(2)
Пример #17
0
    def _get_kubernetes_cluster(worker_template_path=WORKER_TEMPLATE_PATH):
        from dask_kubernetes import KubeCluster

        cluster = KubeCluster.from_yaml(worker_template_path)
        return Client(cluster)