def optimize(dsk, keys, **kwargs): flatkeys = list(flatten(keys)) if isinstance(keys, list) else [keys] dsk, dependencies = cull(dsk, flatkeys) dsk, dependencies = fuse(dsk, keys, dependencies=dependencies, ave_width=_globals.get('fuse_ave_width', 1)) dsk, _ = cull(dsk, keys) return dsk
def test_set_options_context_manger(): with set_options(foo='bar'): assert _globals['foo'] == 'bar' assert _globals.get('foo', None) is None try: set_options(foo='baz') assert _globals['foo'] == 'baz' finally: del _globals['foo']
def dask_pipeline(df, schema, canvas, glyph, summary): dsk, name = glyph_dispatch(glyph, df, schema, canvas, summary) if LooseVersion(dask.__version__) >= '0.18.0': get = dask.base.get_scheduler() or df.__dask_scheduler__ else: get = _globals.get('get') or getattr(df, '__dask_scheduler__', None) or df._default_get keys = getattr(df, '__dask_keys__', None) or df._keys optimize = getattr(df, '__dask_optimize__', None) or df._optimize dsk.update(optimize(df.dask, keys())) return get(dsk, name)
def maybe_compress(payload, min_size=1e4, sample_size=1e4, nsamples=5): """ Maybe compress payload 1. We don't compress small messages 2. We sample the payload in a few spots, compress that, and if it doesn't do any good we return the original 3. We then compress the full original, it it doesn't compress well then we return the original 4. We return the compressed result """ compression = _globals.get('compression', default_compression) if not compression: return None, payload if len(payload) < min_size: return None, payload if len(payload) > 2**31: # Too large, compression libraries often fail return None, payload min_size = int(min_size) sample_size = int(sample_size) compress = compressions[compression]['compress'] # Compress a sample, return original if not very compressed sample = byte_sample(payload, sample_size, nsamples) if len(compress( sample)) > 0.9 * len(sample): # sample not very compressible return None, payload if type(payload) is memoryview: nbytes = payload.itemsize * len(payload) else: nbytes = len(payload) if default_compression and blosc and type(payload) is memoryview: # Blosc does itemsize-aware shuffling, resulting in better compression compressed = blosc.compress(payload, typesize=payload.itemsize, cname='lz4', clevel=5) compression = 'blosc' else: compressed = compress(ensure_bytes(payload)) if len(compressed) > 0.9 * nbytes: # full data not very compressible return None, payload else: return compression, compressed