def test_pause_executor(c, s, a): memory = psutil.Process().memory_info().rss a.memory_limit = memory / 0.5 + 200e6 np = pytest.importorskip("numpy") def f(): x = np.ones(int(400e6), dtype="u1") sleep(1) with captured_logger(logging.getLogger("distributed.worker")) as logger: future = c.submit(f) futures = c.map(slowinc, range(30), delay=0.1) start = time() while not a.paused: yield gen.sleep(0.01) assert time() < start + 4, ( format_bytes(psutil.Process().memory_info().rss), format_bytes(a.memory_limit), len(a.data), ) out = logger.getvalue() assert "memory" in out.lower() assert "pausing" in out.lower() assert sum(f.status == "finished" for f in futures) < 4 yield wait(futures)
def print_ds_info(ds, var): """Function for printing chunking information""" dt = ds[var].dtype itemsize = dt.itemsize chunk_size = ds[var].data.chunksize size = format_bytes(ds.nbytes) _bytes = reduce(mul, chunk_size) * itemsize chunk_size_bytes = format_bytes(_bytes) print(f'Variable name: {var}') print(f'Dataset dimensions: {ds[var].dims}') print(f'Chunk shape: {chunk_size}') print(f'Dataset shape: {ds[var].shape}') print(f'Chunk size: {chunk_size_bytes}') print(f'Dataset size: {size}')
def main(args=None): args = parse_args(args) if args.protocol == 'ucx': sched_str = "ucx://" + args.server + ":" + args.port client = Client(sched_str) else: kwargs = {'n_workers': 2, 'threads_per_worker': 40} kwargs['processes'] = args.protocol == 'tcp' cluster = LocalCluster(**kwargs) client = Client(cluster) print(f"Connected to {client}") N = 1_000_000 P = 1_000 X = da.random.uniform(size=(N, P), chunks=(N // 100, P)) print(format_bytes(X.nbytes)) result = X.T.dot(X) start = clock() result.compute() stop = clock() print(result) print(f"\tTook {stop - start:0.2f}s") time.sleep(10)
def _widget_status(self): workers = len(self.scheduler.workers) cores = sum(ws.ncores for ws in self.scheduler.workers.values()) memory = sum(ws.memory_limit for ws in self.scheduler.workers.values()) memory = format_bytes(memory) text = """ <div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style> <table style="text-align: right;"> <tr><th>Workers</th> <td>%d</td></tr> <tr><th>Cores</th> <td>%d</td></tr> <tr><th>Memory</th> <td>%s</td></tr> </table> </div> """ % ( workers, cores, memory, ) return text
def _widget_status(self): client = self._dask_client() workers = client.scheduler_info()['workers'] n_workers = len(workers) cores = sum(w['nthreads'] for w in workers.values()) memory = sum(w['memory_limit'] for w in workers.values()) text = """ <div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style> <table style="text-align: right;"> <tr><th>Workers</th> <td>%d</td></tr> <tr><th>Cores</th> <td>%d</td></tr> <tr><th>Memory</th> <td>%s</td></tr> </table> </div> """ % (n_workers, cores, format_bytes(memory)) return text
def main(args=None): args = parse_args(args) if args.protocol == 'ucx': address = dask.config.get("distributed.comm.ucxaddress") if address is None: raise ValueError("Set distributed.comm.ucxaddress") client = Client(address) else: kwargs = {'n_workers': 2, 'threads_per_worker': 40} kwargs['processes'] = args.protocol == 'tcp' cluster = LocalCluster(**kwargs) client = Client(cluster) print(f"Connected to {client}") N = 1_000_000 P = 1_000 X = da.random.uniform(size=(N, P), chunks=(N // 100, P)) print(format_bytes(X.nbytes)) result = X.T.dot(X) start = clock() result.compute() stop = clock() print(result) print(f"\tTook {stop - start:0.2f}s") time.sleep(10)
def _widget_status(self): ### reporting proper number of nodes vs workers in a multi-GPU worker scenario nodes = len(self.scheduler_info["workers"]) if self.use_gpu: nodes = int(nodes / self.n_gpus_per_node) if hasattr(self, "worker_spec"): requested = sum( 1 if "group" not in each else len(each["group"]) for each in self.worker_spec.values() ) elif hasattr(self, "nodes"): requested = len(self.nodes) else: requested = nodes nodes = self._format_nodes(nodes, requested, self.use_gpu, self.n_gpus_per_node) cores = sum(v["nthreads"] for v in self.scheduler_info["workers"].values()) cores_or_gpus = "Workers (GPUs)" if self.use_gpu else "Workers (vCPUs)" memory = ( sum( v["gpu"]["memory-total"][0] for v in self.scheduler_info["workers"].values() ) if self.use_gpu else sum(v["memory_limit"] for v in self.scheduler_info["workers"].values()) ) memory = format_bytes(memory) text = """ <div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style> <table style="text-align: right;"> <tr> <th>Nodes</th> <td>%s</td></tr> <tr> <th>%s</th> <td>%s</td></tr> <tr> <th>Memory</th> <td>%s</td></tr> </table> </div> """ % ( nodes, cores_or_gpus, cores, memory, ) return text
def make_cluster_model( cluster_id: str, cluster_name: str, cluster: Cluster, adaptive: Union[Adaptive, None], ) -> ClusterModel: """ Make a cluster model. This is a JSON-serializable representation of the information about a cluster that can be sent over the wire. Parameters ---------- cluster_id: string A unique string for the cluster. cluster_name: string A display name for the cluster. cluster: Cluster The cluster out of which to make the cluster model. adaptive: Adaptive The adaptive controller for the number of workers for the cluster, or none if the cluster is not scaled adaptively. """ # This would be a great target for a dataclass # once python 3.7 is in wider use. try: info = cluster.scheduler_info except AttributeError: info = cluster.scheduler.identity() try: cores = sum(d["nthreads"] for d in info["workers"].values()) except KeyError: # dask.__version__ < 2.0 cores = sum(d["ncores"] for d in info["workers"].values()) assert isinstance(info, dict) model = dict( id=cluster_id, name=cluster_name, scheduler_address=cluster.scheduler_address, dashboard_link=cluster.dashboard_link or "", workers=len(info["workers"]), memory=utils.format_bytes( sum(d["memory_limit"] for d in info["workers"].values()) ), cores=cores, ) if hasattr(cluster, "_supports_scaling"): model["supports_scaling"] = cluster._supports_scaling else: model["supports_scaling"] = True if adaptive: model["adapt"] = {"minimum": adaptive.minimum, "maximum": adaptive.maximum} return model
def __repr__(self): running_workers = self._count_active_workers() running_cores = running_workers * self.worker_process_threads total_jobs = len(self.pending_jobs) + len(self.running_jobs) total_workers = total_jobs * self.worker_processes running_memory = running_workers * self.worker_memory / self.worker_processes return (self.__class__.__name__ + '(cores=%d, memory=%s, workers=%d/%d, jobs=%d/%d)' % (running_cores, format_bytes(running_memory), running_workers, total_workers, len(self.running_jobs), total_jobs))
def _widget_status(self): try: workers = self.scheduler_info["workers"] except KeyError: return None else: n_workers = len(workers) cores = sum(w["nthreads"] for w in workers.values()) memory = sum(w["memory_limit"] for w in workers.values()) return _widget_status_template % (n_workers, cores, format_bytes(memory))
def test_pause_executor(c, s, a): memory = psutil.Process().memory_info().rss a.memory_limit = memory / 0.8 + 200e6 np = pytest.importorskip('numpy') def f(): x = np.ones(int(300e6), dtype='u1') sleep(1) with captured_logger(logging.getLogger('distributed.worker')) as logger: future = c.submit(f) futures = c.map(slowinc, range(10), delay=0.1) yield gen.sleep(0.3) assert a.paused, (format_bytes(psutil.Process().memory_info().rss), format_bytes(a.memory_limit)) out = logger.getvalue() assert 'memory' in out.lower() assert 'pausing' in out.lower() assert sum(f.status == 'finished' for f in futures) < 4 yield wait(futures)
def _widget_status(self): if self._internal_client is None: return None try: workers = self._internal_client._scheduler_identity["workers"] except KeyError: if self._internal_client.status in ("closing", "closed"): return None else: n_workers = len(workers) cores = sum(w["nthreads"] for w in workers.values()) memory = sum(w["memory_limit"] for w in workers.values()) return _widget_status_template % (n_workers, cores, format_bytes(memory))
def main(args=None): args = parse_args(args) if args.protocol == 'ucx': sched_str = "ucx://"+ args.server + ":13337" client = Client(sched_str) elif args.protocol == 'tcp': sched_str = "tcp://"+ args.server + ":13337" client = Client(sched_str) else: kwargs = {'n_workers': 2, 'threads_per_worker': 40} kwargs['processes'] = args.protocol == 'tcp' cluster = LocalCluster(**kwargs) client = Client(cluster) print(f"Connected to {client}") N = int(args.length) P = int(args.length) RS = da.random.RandomState(RandomState=cupy.random.RandomState) #RS = da.random.RandomState(123) X = RS.normal(10, 1, size=(N, P)) #X = da.random.uniform(size=(N, P), chunks=(N/100, P/100)) X.persist() print(format_bytes(X.nbytes)) result = (X + X.T).sum() #(x + x.T).sum().compute() start = clock() result.compute() #with get_task_stream() as ts: # result.compute() stop = clock() #print(ts.data) print(result) print(format_bytes(X.nbytes)) print(f"\tTook {stop - start:0.2f}s") time.sleep(1)
def main(args=None): args = parse_args(args) client = Client(args.scheduler_address) # noqa X = da.random.random(size=(100_000, 10_000), chunks=1_000) protocol = client.scheduler_info()['address'].split(":")[0] ctx = base.maybe_setup_profile(args.profile, 'bench-array-ops', protocol) x = X[:10].dot(X.T).sum(1) print("Array size:", format_bytes(X.nbytes)) print("Client :", client) print("Profile? :", "yes" if args.profile else "no") print("-" * 80) with ctx: start = clock() dask.compute(x.sum(), x.mean(), x.std()) stop = clock() print(f"\t Took {stop - start:0.2f}s")
def main(args=None): args = parse_args(args) client = Client(address=args.scheduler_address) protocol = client.scheduler_info()['address'].split(":")[0] ctx = base.maybe_setup_profile(args.profile, 'dot-product', protocol) print(f"Connected to {client}") N = 1_000_000 P = 1_000 X = da.random.uniform(size=(N, P), chunks=(N // 100, P)) print(format_bytes(X.nbytes)) result = X.T.dot(X) with ctx: start = clock() result.compute() stop = clock() print(f"\tTook {stop - start:0.2f}s")
async def connect(host, port, n_bytes, n_iter, recv, np, verbose, increment): ep = ucp.get_endpoint(host.encode(), port) arr = np.zeros(n_bytes, dtype='u1') start = clock() for i in range(n_iter): await ep.send_obj(arr) if recv == 'recv_into': await ep.recv_into(arr, arr.nbytes) else: # This is failing right now msg = await ep.recv_obj(arr.nbytes, cuda=np.__name__ == 'cupy') arr = np.asarray(msg.get_obj()) stop = clock() expected = np.ones(n_bytes, dtype='u1') # 0 or n_iter expected *= (int(increment) * n_iter) np.testing.assert_array_equal(arr, expected) took = stop - start # 2 for round-trip, n_iter for number of trips. print("Roundtrip benchmark") print("-------------------") print(f"n_iter | {n_iter}") print(f"n_bytes | {format_bytes(n_bytes)}") print(f"recv | {recv}") print(f"object | {np.__name__}") print(f"inc | {increment}") print("\n===================") print(format_bytes(2 * n_iter * arr.nbytes / took), '/ s') print("===================") await ep.recv_future() await ep.send_obj(np.ones(1)) ep.close()
def main(): args = parse_args() q1 = mp.Queue() p1 = mp.Process(target=server, args=(q1, args)) p1.start() port = q1.get() q2 = mp.Queue() p2 = mp.Process(target=client, args=(q2, port, args)) p2.start() times = q2.get() p1.join() p2.join() assert not p1.exitcode assert not p2.exitcode assert len(times) == args.n_iter print("Roundtrip benchmark") print("--------------------------") print(f"n_iter | {args.n_iter}") print(f"n_bytes | {format_bytes(args.n_bytes)}") print(f"object | {args.object_type}") print(f"reuse alloc | {args.reuse_alloc}") print("==========================") if args.object_type == "numpy": print(f"Device(s) | Single CPU") else: print(f"Device(s) | {args.server_dev}, {args.client_dev}") print( f"Average | {format_bytes(2 * args.n_iter * args.n_bytes / sum(times))}/s" ) print("--------------------------") print("Iterations") print("--------------------------") for i, t in enumerate(times): ts = format_bytes(2 * args.n_bytes / t) ts = (" " * (9 - len(ts))) + ts print("%03d |%s/s" % (i, ts))
async def connect(host, port, n_bytes, n_iter, recv, np, verbose, increment): """ connect to server and write data """ ep = await ucp.create_endpoint(host, port) msg = np.zeros(n_bytes, dtype="u1") msg_size = numpy.array([msg.nbytes], dtype=np.uint64) start = clock() for i in range(n_iter): # send first message await ep.send(msg, msg_size) # send the real message resp = np.empty_like(msg) await ep.recv(resp, msg_size) # receive the echo stop = clock() expected = np.ones(n_bytes, dtype="u1") expected *= int(increment) * n_iter np.testing.assert_array_equal(msg, expected) took = stop - start # 2 for round-trip, n_iter for number of trips. print("Roundtrip benchmark") print("-------------------") print(f"n_iter | {n_iter}") print(f"n_bytes | {format_bytes(n_bytes)}") print(f"recv | {recv}") print(f"object | {np.__name__}") print(f"inc | {increment}") print("\n===================") print(format_bytes(2 * n_iter * msg.nbytes / took), "/ s") print("===================")
from dask.distributed import Client import time #client = Client(processes=False, threads_per_worker=4, n_workers=1, memory_limit='2GB') client = Client('localhost:8786') print(client) import dask from distributed.utils import format_bytes import dask_ml.cluster import dask_ml.datasets X, y = dask_ml.datasets.make_blobs( n_samples=100000, n_features=50, centers=3, chunks=10000, ) format_bytes(X.nbytes) X = X.persist() km = dask_ml.cluster.KMeans(n_clusters=3, init_max_iter=2, oversampling_factor=10, random_state=0) t = time.time() km.fit(X) print('Time kmeans distributed:' time.time()-t)
def __init__(self, name=None, cores=None, memory=None, processes=None, interface=None, death_timeout=None, local_directory=None, extra=None, env_extra=None, log_directory=None, walltime=None, threads=None, python=sys.executable, **kwargs): """ """ # """ # This initializer should be considered as Abstract, and never used directly. # """ if threads is not None: raise ValueError(threads_deprecation_message) if not self.scheduler_name: raise NotImplementedError( 'JobQueueCluster is an abstract class that should not be instanciated.' ) if name is None: name = dask.config.get('jobqueue.%s.name' % self.scheduler_name) if cores is None: cores = dask.config.get('jobqueue.%s.cores' % self.scheduler_name) if memory is None: memory = dask.config.get('jobqueue.%s.memory' % self.scheduler_name) if processes is None: processes = dask.config.get('jobqueue.%s.processes' % self.scheduler_name) if interface is None: interface = dask.config.get('jobqueue.%s.interface' % self.scheduler_name) if death_timeout is None: death_timeout = dask.config.get('jobqueue.%s.death-timeout' % self.scheduler_name) if local_directory is None: local_directory = dask.config.get('jobqueue.%s.local-directory' % self.scheduler_name) if extra is None: extra = dask.config.get('jobqueue.%s.extra' % self.scheduler_name) if env_extra is None: env_extra = dask.config.get('jobqueue.%s.env-extra' % self.scheduler_name) if log_directory is None: log_directory = dask.config.get('jobqueue.%s.log-directory' % self.scheduler_name) if dask.config.get('jobqueue.%s.threads', None): warnings.warn(threads_deprecation_message) if cores is None: raise ValueError( "You must specify how many cores to use per job like ``cores=8``" ) if memory is None: raise ValueError( "You must specify how much memory to use per job like ``memory='24 GB'``" ) # This attribute should be overridden self.job_header = None if interface: extra += ['--interface', interface] kwargs.setdefault('ip', get_ip_interface(interface)) else: kwargs.setdefault('ip', '') # Bokeh diagnostics server should listen on all interfaces diagnostics_ip_and_port = ('', 8787) self.local_cluster = LocalCluster( n_workers=0, diagnostics_port=diagnostics_ip_and_port, **kwargs) # Keep information on process, cores, and memory, for use in subclasses self.worker_memory = parse_bytes( memory) if memory is not None else None self.worker_processes = processes self.worker_cores = cores self.name = name # plugin for tracking job status self._scheduler_plugin = JobQueuePlugin() self.local_cluster.scheduler.add_plugin(self._scheduler_plugin) self._adaptive = None self._env_header = '\n'.join(env_extra) # dask-worker command line build dask_worker_command = '%(python)s -m distributed.cli.dask_worker' % dict( python=python) command_args = [dask_worker_command, self.scheduler.address] command_args += ['--nthreads', self.worker_threads] if processes is not None and processes > 1: command_args += ['--nprocs', processes] mem = format_bytes(self.worker_memory / self.worker_processes) command_args += ['--memory-limit', mem.replace(' ', '')] command_args += ['--name', '%s--${JOB_ID}--' % name] if death_timeout is not None: command_args += ['--death-timeout', death_timeout] if local_directory is not None: command_args += ['--local-directory', local_directory] if extra is not None: command_args += extra self._command_template = ' '.join(map(str, command_args)) self._target_scale = 0 self.log_directory = log_directory if self.log_directory is not None: if not os.path.exists(self.log_directory): os.makedirs(self.log_directory)
def worker_process_memory(self): mem = format_bytes(self.worker_memory / self.worker_processes) mem = mem.replace(" ", "") return mem
broadcast=True) cl_bin_utils = scatter_dict(kappa0.cl_bin_utils, broadcast=True) xi_bin_utils = scatter_dict(kappa0.xi_bin_utils, broadcast=True) ##`fix_cosmo` sets only one cosmology. If `False`, then it recalculates the power spectrum every time. if fix_cosmo: kappa0.Ang_PS.angular_power_z() else: kappa0.Ang_PS.reset() print('kappa0 pk', kappa0.Ang_PS.PS.pk_func) kappa0 = client.scatter(kappa0, broadcast=True) proc = psutil.Process() print('starting mcmc ', 'mem, peak mem: ', format_bytes(proc.memory_info().rss), int(getrusage(RUSAGE_SELF).ru_maxrss / 1024. / 1024.)) ##define functions def get_priors(params): #assume flat priors for now x = np.logical_or(np.any(params > priors_max, axis=1), np.any(params < priors_min, axis=1)) p = np.zeros(len(params)) p[x] = -np.inf return p def assign_zparams(zbins={}, p_name='', p_value=None): pp = p_name.split('_') p_n = pp[0] bin_indx = np.int(pp[1]) zbins[bin_indx][p_n] = p_value
def client(queue, port, server_address, args): if args.client_cpu_affinity >= 0: os.sched_setaffinity(0, [args.client_cpu_affinity]) ucp.init() if args.object_type == "numpy": import numpy as np elif args.object_type == "cupy": import cupy as np np.cuda.runtime.setDevice(args.client_dev) else: import cupy as np import rmm rmm.reinitialize( pool_allocator=True, managed_memory=False, initial_pool_size=args.rmm_init_pool_size, devices=[args.client_dev], ) np.cuda.runtime.setDevice(args.client_dev) np.cuda.set_allocator(rmm.rmm_cupy_allocator) async def run(): ep = await ucp.create_endpoint(server_address, port) msg_send_list = [] msg_recv_list = [] if not args.reuse_alloc: for i in range(args.n_iter): msg_send_list.append(np.arange(args.n_bytes, dtype="u1")) msg_recv_list.append(np.zeros(args.n_bytes, dtype="u1")) else: t1 = np.arange(args.n_bytes, dtype="u1") t2 = np.zeros(args.n_bytes, dtype="u1") for i in range(args.n_iter): msg_send_list.append(t1) msg_recv_list.append(t2) assert msg_send_list[0].nbytes == args.n_bytes assert msg_recv_list[0].nbytes == args.n_bytes if args.cuda_profile: np.cuda.profiler.start() times = [] for i in range(args.n_iter): start = clock() await ep.send(msg_send_list[i], args.n_bytes) await ep.recv(msg_recv_list[i], args.n_bytes) stop = clock() times.append(stop - start) if args.cuda_profile: np.cuda.profiler.stop() queue.put(times) loop = asyncio.get_event_loop() loop.run_until_complete(run()) loop.close() times = queue.get() assert len(times) == args.n_iter print("Roundtrip benchmark") print("--------------------------") print(f"n_iter | {args.n_iter}") print(f"n_bytes | {format_bytes(args.n_bytes)}") print(f"object | {args.object_type}") print(f"reuse alloc | {args.reuse_alloc}") print("==========================") if args.object_type == "numpy": print("Device(s) | CPU-only") s_aff = (args.server_cpu_affinity if args.server_cpu_affinity >= 0 else "affinity not set") c_aff = (args.client_cpu_affinity if args.client_cpu_affinity >= 0 else "affinity not set") print(f"Server CPU | {s_aff}") print(f"Client CPU | {c_aff}") else: print(f"Device(s) | {args.server_dev}, {args.client_dev}") print( f"Average | {format_bytes(2 * args.n_iter * args.n_bytes / sum(times))}/s" ) print("--------------------------") print("Iterations") print("--------------------------") for i, t in enumerate(times): ts = format_bytes(2 * args.n_bytes / t) ts = (" " * (9 - len(ts))) + ts print("%03d |%s/s" % (i, ts))
def run(self): logger.warning('Reading configuration YAML config file') operation_choice = self.params['operation_choice'] machine = self.params['machine'] job_scheduler = self.params['job_scheduler'] queue = self.params['queue'] walltime = self.params['walltime'] maxmemory_per_node = self.params['maxmemory_per_node'] maxcore_per_node = self.params['maxcore_per_node'] chunk_per_worker = self.params['chunk_per_worker'] freq = self.params['freq'] spil = self.params['spil'] output_dir = self.params.get('output_dir', results_dir) now = datetime.datetime.now() output_dir = os.path.join(output_dir, f'{machine}/{str(now.date())}') os.makedirs(output_dir, exist_ok=True) parameters = self.params['parameters'] num_workers = parameters['number_of_workers_per_nodes'] num_threads = parameters.get('number_of_threads_per_workers', 1) num_nodes = parameters['number_of_nodes'] chunking_schemes = parameters['chunking_scheme'] io_formats = parameters['io_format'] filesystems = parameters['filesystem'] fixed_totalsize = parameters['fixed_totalsize'] chsz = parameters['chunk_size'] writefile_dir = parameters['writefile_dir'] for wpn in num_workers: self.create_cluster( job_scheduler=job_scheduler, maxcore=maxcore_per_node, walltime=walltime, memory=maxmemory_per_node, queue=queue, wpn=wpn, ) for num in num_nodes: self.client.cluster.scale(num * wpn) cluster_wait(self.client, num * wpn) timer = DiagnosticTimer() # dfs = [] logger.warning( '#####################################################################\n' f'Dask cluster:\n' f'\t{self.client.cluster}\n') now = datetime.datetime.now() csv_filename = f"{output_dir}/compute_study_{now.strftime('%Y-%m-%d_%H-%M-%S')}.csv" for chunk_size in chsz: for io_format in io_formats: for filesystem in filesystems: if filesystem == 's3': profile = parameters['profile'] bucket = parameters['bucket'] endpoint_url = parameters['endpoint_url'] fs = fsspec.filesystem( 's3', profile=profile, anon=False, client_kwargs={ 'endpoint_url': endpoint_url }, ) root = f'{bucket}/test1' elif filesystem == 'posix': fs = LocalFileSystem() root = writefile_dir if not os.path.isdir(f'{root}'): os.makedirs(f'{root}') for chunking_scheme in chunking_schemes: logger.warning( f'Benchmark starting with: \n\tworker_per_node = {wpn},' f'\n\tnum_nodes = {num}, \n\tchunk_size = {chunk_size},' f'\n\tchunking_scheme = {chunking_scheme},' f'\n\tchunk per worker = {chunk_per_worker}' f'\n\tio_format = {io_format}' f'\n\tfilesystem = {filesystem}') ds, chunks = timeseries( fixed_totalsize=fixed_totalsize, chunk_per_worker=chunk_per_worker, chunk_size=chunk_size, chunking_scheme=chunking_scheme, io_format=io_format, num_nodes=num, freq=freq, worker_per_node=wpn, ) # wait(ds) dataset_size = format_bytes(ds.nbytes) logger.warning(ds) logger.warning( f'Dataset total size: {dataset_size}') for op in self.operations[operation_choice]: with timer.time( 'runtime', operation=op.__name__, fixed_totalsize=fixed_totalsize, chunk_size=chunk_size, chunk_per_worker=chunk_per_worker, dataset_size=dataset_size, worker_per_node=wpn, threads_per_worker=num_threads, num_nodes=num, chunking_scheme=chunking_scheme, io_format=io_format, filesystem=filesystem, root=root, machine=machine, maxmemory_per_node= maxmemory_per_node, maxcore_per_node=maxcore_per_node, spil=spil, ): fname = f'{chunk_size}{chunking_scheme}{filesystem}{num}' if op.__name__ == 'writefile': print(ds.sst.data.chunksize) filename = op( ds, fs, io_format, root, fname) elif op.__name__ == 'openfile': ds = op(fs, io_format, root, chunks, chunk_size) elif op.__name__ == 'deletefile': ds = op(fs, io_format, root, filename) else: op(ds) # kills ds, and every other dependent computation logger.warning('Computation done') self.client.cancel(ds) temp_df = timer.dataframe() temp_df.to_csv(csv_filename, index=False) # dfs.append(temp_df) # now = datetime.datetime.now() # filename = f"{output_dir}/compute_study_{now.strftime('%Y-%m-%d_%H-%M-%S')}.csv" # df = pd.concat(dfs) # df.to_csv(filename, index=False) logger.warning( f'Persisted benchmark result file: {csv_filename}') logger.warning( 'Shutting down the client and cluster before changing number of workers per nodes' ) self.client.cluster.close() logger.warning('Cluster shutdown finished') self.client.close() logger.warning('Client shutdown finished') logger.warning('=====> The End <=========')
def generate_maps(self, ): client = client_get(scheduler_info=self.scheduler_info) SJ = client.scatter(self, broadcast=True) step = self.nworkers * self.njobs_submit_per_worker # min(nsim,len(client.scheduler_info()['workers'])) i = 0 j = 0 futures = [delayed(get_clsim)(SJ, i) for i in np.arange(self.nsim)] futures_done = [] while j < self.nsim: futures_j = client.compute(futures[j:j + step]) wait_futures(futures_j) futures_done += futures_j j += step del futures if self.kappa_class.do_pseudo_cl: self.cl_b = { im: { 'full': np.zeros(self.sim_clb_shape, dtype='float32') } for im in self.Master_algs } for im in self.Master_algs: self.cl_b[im].update({jks: {} for jks in self.jk_stat_keys}) self.pcl_b = { 'full': np.zeros(self.sim_clb_shape, dtype='float32') } self.pcl_b.update({jks: {} for jks in self.jk_stat_keys}) if self.do_xi: self.xi_b = { 'full': np.zeros(self.sim_xib_shape, dtype='float32') } # {im:np.zeros(sim_clb_shape,dtype='float32') for im in Master_algs}} self.xi_b.update({ jks: {} for jks in self.jk_stat_keys }) #{im:{} for im in Master_algs} for jks in jk_stat_keys}) im = 'xi_imaster' self.cl_b[im] = { 'full': np.zeros(self.sim_clb_shape, dtype='float32') } self.cl_b[im].update({jks: {} for jks in self.jk_stat_keys}) for i in np.arange(self.nsim): tt = futures_done[i].result() if self.kappa_class.do_pseudo_cl: self.pcl_b[i] = tt[0] for k in self.Master_algs: self.cl_b[k][i] = tt[1][k] if self.do_xi: self.xi_b[i] = tt[2] k = 'xi_imaster' self.cl_b[k][i] = tt[1][k] client.cancel(futures_done[i]) proc = psutil.Process() print('done map ', i, thread_count(), 'mem, peak mem: ', format_bytes(proc.memory_info().rss), int(getrusage(RUSAGE_SELF).ru_maxrss / 1024. / 1024.)) # del futures_done print('done map ', i, thread_count(), 'mem, peak mem: ', format_bytes(proc.memory_info().rss), int(getrusage(RUSAGE_SELF).ru_maxrss / 1024. / 1024.)) j += step
def __init__(self, name=None, cores=None, memory=None, processes=None, interface=None, death_timeout=None, local_directory=None, extra=None, env_extra=None, walltime=None, threads=None, **kwargs ): """ """ # """ # This initializer should be considered as Abstract, and never used # directly. # """ if threads is not None: raise ValueError(threads_deprecation_message) if not self.scheduler_name: raise NotImplementedError('JobQueueCluster is an abstract class ' 'that should not be instanciated.') if name is None: name = dask.config.get('jobqueue.%s.name' % self.scheduler_name) if cores is None: cores = dask.config.get('jobqueue.%s.cores' % self.scheduler_name) if memory is None: memory = dask.config.get('jobqueue.%s.memory' % self.scheduler_name) if processes is None: processes = dask.config.get('jobqueue.%s.processes' % self.scheduler_name) if interface is None: interface = dask.config.get('jobqueue.%s.interface' % self.scheduler_name) if death_timeout is None: death_timeout = dask.config.get('jobqueue.%s.death-timeout' % self.scheduler_name) if local_directory is None: local_directory = dask.config.get('jobqueue.%s.local-directory' % self.scheduler_name) if extra is None: extra = dask.config.get('jobqueue.%s.extra' % self.scheduler_name) if env_extra is None: env_extra = dask.config.get('jobqueue.%s.env-extra' % self.scheduler_name) if dask.config.get('jobqueue.%s.threads', None): warnings.warn(threads_deprecation_message) if cores is None: raise ValueError("You must specify how many cores to use per job " "like ``cores=8``") if memory is None: raise ValueError("You must specify how much memory to use per job " "like ``memory='24 GB'``") #This attribute should be overriden self.job_header = None if interface: host = get_ip_interface(interface) extra += ' --interface %s ' % interface else: host = socket.gethostname() self.local_cluster = LocalCluster(n_workers=0, ip=host, **kwargs) # Keep information on process, cores, and memory, for use in subclasses self.worker_memory = parse_bytes(memory) self.worker_processes = processes self.worker_cores = cores self.name = name self.jobs = dict() self.n = 0 self._adaptive = None self._env_header = '\n'.join(env_extra) # dask-worker command line build dask_worker_command = ( '%(python)s -m distributed.cli.dask_worker' % dict(python=sys.executable)) self._command_template = ' '.join([dask_worker_command, self.scheduler.address]) self._command_template += " --nthreads %d" % self.worker_threads if processes is not None and processes > 1: self._command_template += " --nprocs %d" % processes mem = format_bytes(self.worker_memory / self.worker_processes) mem = mem.replace(' ', '') self._command_template += " --memory-limit %s" % mem if name is not None: self._command_template += " --name %s" % name self._command_template += "-%(n)d" # Keep %(n) to be replaced later if death_timeout is not None: self._command_template += " --death-timeout %s" % death_timeout if local_directory is not None: self._command_template += " --local-directory %s" % local_directory if extra is not None: self._command_template += extra