def test_get_scale_up_kwargs(loop): with LocalCluster(0, scheduler_port=0, silence_logs=False, diagnostics_port=None, loop=loop) as cluster: alc = Adaptive(cluster.scheduler, cluster, interval=100, scale_factor=3) assert alc.get_scale_up_kwargs() == {'n': 1} with Client(cluster, loop=loop) as c: future = c.submit(lambda x: x + 1, 1) assert future.result() == 2 assert c.ncores() assert alc.get_scale_up_kwargs() == {'n': 3}
def test_min_max(): loop = IOLoop.current() cluster = yield LocalCluster(0, scheduler_port=0, silence_logs=False, processes=False, diagnostics_port=None, loop=loop, asynchronous=True) yield cluster._start() try: adapt = Adaptive(cluster.scheduler, cluster, minimum=1, maximum=2, interval=20) c = yield Client(cluster, asynchronous=True, loop=loop) start = time() while not cluster.scheduler.workers: yield gen.sleep(0.01) assert time() < start + 1 yield gen.sleep(0.2) assert len(cluster.scheduler.workers) == 1 assert frequencies(pluck(1, adapt.log)) == {'up': 1} futures = c.map(slowinc, range(100), delay=0.1) start = time() while len(cluster.scheduler.workers) < 2: yield gen.sleep(0.01) assert time() < start + 1 assert len(cluster.scheduler.workers) == 2 yield gen.sleep(0.5) assert len(cluster.scheduler.workers) == 2 assert len(cluster.workers) == 2 assert frequencies(pluck(1, adapt.log)) == {'up': 2} del futures start = time() while len(cluster.scheduler.workers) != 1: yield gen.sleep(0.01) assert time() < start + 1 assert frequencies(pluck(1, adapt.log)) == {'up': 2, 'down': 1} finally: yield c._close() yield cluster._close()
def test_adaptive_local_cluster_multi_workers(): loop = IOLoop.current() cluster = LocalCluster(0, scheduler_port=0, silence_logs=False, processes=False, diagnostics_port=None, loop=loop, start=False) try: cluster.scheduler.allowed_failures = 1000 alc = Adaptive(cluster.scheduler, cluster, interval=100) c = yield Client(cluster, asynchronous=True, loop=loop) futures = c.map(slowinc, range(100), delay=0.01) start = time() while not cluster.scheduler.worker_info: yield gen.sleep(0.01) assert time() < start + 15 yield c._gather(futures) del futures start = time() while cluster.workers: yield gen.sleep(0.01) assert time() < start + 5 assert not cluster.workers assert not cluster.scheduler.workers yield gen.sleep(0.2) assert not cluster.workers assert not cluster.scheduler.workers futures = c.map(slowinc, range(100), delay=0.01) yield c._gather(futures) finally: yield c._close() yield cluster._close()
def test_adaptive_local_cluster_multi_workers(): loop = IOLoop.current() cluster = LocalCluster(0, scheduler_port=0, silence_logs=False, nanny=False, diagnostic_port=None, loop=loop, start=False) alc = Adaptive(cluster.scheduler, cluster, interval=100) c = Client(cluster, start=False, loop=loop) yield c._start() for i in range(20): futures = c.map(slowinc, range(100), delay=0.01) yield c._gather(futures) del futures yield gen.sleep(0.1) yield c._shutdown() yield cluster._close()
def test_adaptive_local_cluster(loop): with LocalCluster(0, scheduler_port=0, silence_logs=False, diagnostics_port=None, loop=loop) as cluster: alc = Adaptive(cluster.scheduler, cluster, interval=100) with Client(cluster, loop=loop) as c: assert not c.ncores() future = c.submit(lambda x: x + 1, 1) assert future.result() == 2 assert c.ncores() sleep(0.1) assert c.ncores() # still there after some time del future start = time() while cluster.scheduler.ncores: sleep(0.01) assert time() < start + 5 assert not c.ncores()
def test_avoid_churn(): """ We want to avoid creating and deleting workers frequently Instead we want to wait a few beats before removing a worker in case the user is taking a brief pause between work """ cluster = yield LocalCluster(0, asynchronous=True, processes=False, scheduler_port=0, silence_logs=False, diagnostics_port=None) client = yield Client(cluster, asynchronous=True) try: adapt = Adaptive(cluster.scheduler, cluster, interval=20, wait_count=5) for i in range(10): yield client.submit(slowinc, i, delay=0.040) yield gen.sleep(0.040) assert frequencies(pluck(1, adapt.log)) == {'up': 1} finally: yield client._close() yield cluster._close()
def test_adaptive_local_cluster_multi_workers(): loop = IOLoop.current() cluster = LocalCluster(0, scheduler_port=0, silence_logs=False, nanny=False, diagnostics_port=None, loop=loop, start=False) cluster.scheduler.allowed_failures = 1000 alc = Adaptive(cluster.scheduler, cluster, interval=100) c = Client(cluster, start=False, loop=loop) yield c._start() futures = c.map(slowinc, range(100), delay=0.01) start = time() while not cluster.workers: yield gen.sleep(0.01) assert time() < start + 5 yield c._gather(futures) del futures start = time() while cluster.workers: yield gen.sleep(0.01) assert time() < start + 5 assert not cluster.workers yield gen.sleep(0.2) assert not cluster.workers futures = c.map(slowinc, range(100), delay=0.01) yield c._gather(futures) yield c._shutdown() yield cluster._close()
def __init__( self, pod_template=None, name=None, namespace=None, n_workers=None, host=None, port=None, env=None, **kwargs ): name = name or dask.config.get('kubernetes.name') namespace = namespace or dask.config.get('kubernetes.namespace') n_workers = n_workers if n_workers is not None else dask.config.get('kubernetes.count.start') host = host or dask.config.get('kubernetes.host') port = port if port is not None else dask.config.get('kubernetes.port') env = env if env is not None else dask.config.get('kubernetes.env') if not pod_template and dask.config.get('kubernetes.worker-template', None): d = dask.config.get('kubernetes.worker-template') pod_template = make_pod_from_dict(d) if not pod_template and dask.config.get('kubernetes.worker-template-path', None): import yaml fn = dask.config.get('kubernetes.worker-template-path') fn = fn.format(**os.environ) with open(fn) as f: d = yaml.safe_load(f) pod_template = make_pod_from_dict(d) if not pod_template: msg = ("Worker pod specification not provided. See KubeCluster " "docstring for ways to specify workers") raise ValueError(msg) self.cluster = LocalCluster(ip=host or socket.gethostname(), scheduler_port=port, n_workers=0, **kwargs) try: kubernetes.config.load_incluster_config() except kubernetes.config.ConfigException: kubernetes.config.load_kube_config() self.core_api = kubernetes.client.CoreV1Api() if namespace is None: namespace = _namespace_default() name = name.format(user=getpass.getuser(), uuid=str(uuid.uuid4())[:10], **os.environ) name = escape(name) self.pod_template = clean_pod_template(pod_template) # Default labels that can't be overwritten self.pod_template.metadata.labels['dask.pydata.org/cluster-name'] = name self.pod_template.metadata.labels['user'] = escape(getpass.getuser()) self.pod_template.metadata.labels['app'] = 'dask' self.pod_template.metadata.labels['component'] = 'dask-worker' self.pod_template.metadata.namespace = namespace self.pod_template.spec.containers[0].env.append( kubernetes.client.V1EnvVar(name='DASK_SCHEDULER_ADDRESS', value=self.scheduler_address) ) if env: self.pod_template.spec.containers[0].env.extend([ kubernetes.client.V1EnvVar(name=k, value=str(v)) for k, v in env.items() ]) self.pod_template.metadata.generate_name = name finalize(self, _cleanup_pods, self.namespace, self.pod_template.metadata.labels) if n_workers: self.scale(n_workers)
def __init__(self, pod_template=None, name=None, namespace=None, n_workers=None, host=None, port=None, env=None, auth=ClusterAuth.DEFAULT, **kwargs): name = name or dask.config.get("kubernetes.name") namespace = namespace or dask.config.get("kubernetes.namespace") n_workers = (n_workers if n_workers is not None else dask.config.get("kubernetes.count.start")) host = host or dask.config.get("kubernetes.host") port = port if port is not None else dask.config.get("kubernetes.port") env = env if env is not None else dask.config.get("kubernetes.env") if not pod_template and dask.config.get("kubernetes.worker-template", None): d = dask.config.get("kubernetes.worker-template") d = dask.config.expand_environment_variables(d) pod_template = make_pod_from_dict(d) if not pod_template and dask.config.get( "kubernetes.worker-template-path", None): import yaml fn = dask.config.get("kubernetes.worker-template-path") fn = fn.format(**os.environ) with open(fn) as f: d = yaml.safe_load(f) d = dask.config.expand_environment_variables(d) pod_template = make_pod_from_dict(d) if not pod_template: msg = ("Worker pod specification not provided. See KubeCluster " "docstring for ways to specify workers") raise ValueError(msg) pod_template = clean_pod_template(pod_template) ClusterAuth.load_first(auth) self.core_api = kubernetes.client.CoreV1Api() if namespace is None: namespace = _namespace_default() name = name.format(user=getpass.getuser(), uuid=str(uuid.uuid4())[:10], **os.environ) name = escape(name) self.pod_template = pod_template # Default labels that can't be overwritten self.pod_template.metadata.labels["dask.org/cluster-name"] = name self.pod_template.metadata.labels["user"] = escape(getpass.getuser()) self.pod_template.metadata.labels["app"] = "dask" self.pod_template.metadata.labels["component"] = "dask-worker" self.pod_template.metadata.namespace = namespace self.cluster = LocalCluster(host=host or socket.gethostname(), scheduler_port=port, n_workers=0, **kwargs) # TODO: handle any exceptions here, ensure self.cluster is properly # cleaned up. self.pod_template.spec.containers[0].env.append( kubernetes.client.V1EnvVar(name="DASK_SCHEDULER_ADDRESS", value=self.scheduler_address)) if env: self.pod_template.spec.containers[0].env.extend([ kubernetes.client.V1EnvVar(name=k, value=str(v)) for k, v in env.items() ]) self.pod_template.metadata.generate_name = name finalize(self, _cleanup_pods, self.namespace, self.pod_template.metadata.labels) if n_workers: try: self.scale(n_workers) except Exception: self.cluster.close() raise