Exemplo n.º 1
0
def test_compute_no_opt():
    # Bag does `fuse` by default. Test that with `optimize_graph=False` that
    # doesn't get called. We check this by using a callback to track the keys
    # that are computed.
    from dask.callbacks import Callback

    b = db.from_sequence(range(100), npartitions=4)
    add1 = tz.partial(add, 1)
    mul2 = tz.partial(mul, 2)
    o = b.map(add1).map(mul2)
    # Check that with the kwarg, the optimization doesn't happen
    keys = []
    with Callback(pretask=lambda key, *args: keys.append(key)):
        o.compute(scheduler="single-threaded", optimize_graph=False)
    assert len([k for k in keys if "mul" in k[0]]) == 4
    assert len([k for k in keys if "add" in k[0]]) == 4
    # Check that without the kwarg, the optimization does happen
    keys = []
    with Callback(pretask=lambda key, *args: keys.append(key)):
        o.compute(scheduler="single-threaded")
    # Names of fused tasks have been merged, and the original key is an alias.
    # Otherwise, the lengths below would be 4 and 0.
    assert len([k for k in keys if "mul" in k[0]]) == 8
    assert len([k for k in keys if "add" in k[0]]) == 4
    assert len([k for k in keys if "add-mul" in k[0]]) == 4  # See? Renamed
Exemplo n.º 2
0
def test_broken_callback():
    from dask.callbacks import Callback

    def _f_ok(*args, **kwargs):
        pass

    def _f_broken(*args, **kwargs):
        raise ValueError("my_exception")

    dsk = {"x": 1}

    with Callback(start=_f_broken, finish=_f_ok):
        with Callback(start=_f_ok, finish=_f_ok):
            with pytest.raises(ValueError, match="my_exception"):
                get(dsk, "x")
def test_add_remove_mutates_not_replaces():
    assert not Callback.active

    with Callback():
        assert Callback.active

    assert not Callback.active
Exemplo n.º 4
0
def test_optimize_graph_false():
    from dask.callbacks import Callback
    d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 10, 'y')}
    keys = []
    with Callback(pretask=lambda key, *args: keys.append(key)):
        get(d, 'z', optimize_graph=False)
    assert len(keys) == 2
    def multi_process_containment_tests(self):
        '''
        Uses Dask to implement multiprocessing in order to speed up computations.

        Returns
        -------
        is_urban : pd.Series
            A Pandas series to be appended as a new column to the original CSV, booleans

        '''

        points = gpd.GeoDataFrame(
            self.data.loc[:, [self.long_name, self.lat_name]],
            geometry=gpd.points_from_xy(self.data.loc[:, self.long_name],
                                        self.data.loc[:, self.lat_name])
        )  #create a series of point objects representing location of events
        containment_checker = self.containment_checker
        cb = Callback(posttask=self.ProgCallback)
        with cb:
            is_urban = dd.from_pandas(
                points.geometry, npartitions=100).map_partitions(
                    lambda dframe: pd.Series(
                        np.any(dframe.apply(containment_checker), axis=1)),
                    meta=pd.Series(dtype=bool)).compute(scheduler='processes')
        return is_urban
Exemplo n.º 6
0
def test_optimize_graph_false():
    from dask.callbacks import Callback

    d = {"x": 1, "y": (inc, "x"), "z": (add, 10, "y")}
    keys = []
    with Callback(pretask=lambda key, *args: keys.append(key)):
        get(d, "z", optimize_graph=False)
    assert len(keys) == 2
Exemplo n.º 7
0
def test_add_remove_mutates_not_replaces():
    g = _globals.copy()

    assert not g['callbacks']

    with Callback():
        pass

    assert not g['callbacks']
Exemplo n.º 8
0
def test_compute_no_opt():
    # Bag does `fuse` by default. Test that with `optimize_graph=False` that
    # doesn't get called. We check this by using a callback to track the keys
    # that are computed.
    from dask.callbacks import Callback
    b = db.from_sequence(range(100), npartitions=4)
    add1 = tz.partial(add, 1)
    mul2 = tz.partial(mul, 2)
    o = b.map(add1).map(mul2)
    # Check that with the kwarg, the optimization doesn't happen
    keys = []
    with Callback(pretask=lambda key, *args: keys.append(key)):
        o.compute(get=dask.get, optimize_graph=False)
    assert len([k for k in keys if 'mul' in k[0]]) == 4
    assert len([k for k in keys if 'add' in k[0]]) == 4
    # Check that without the kwarg, the optimization does happen
    keys = []
    with Callback(pretask=lambda key, *args: keys.append(key)):
        o.compute(get=dask.get)
    assert len([k for k in keys if 'mul' in k[0]]) == 4
    assert len([k for k in keys if 'add' in k[0]]) == 0
Exemplo n.º 9
0
def cachecontext(cache: Union[str, Cache, None] = DEFAULT_CACHE_PATH) -> Callback:
    """Enables automatic caching of function node results and values.

    Parameters
    ----------
    cache: Union[str, Cache, None]
        If `str` caches to the file system to a directory corresponding to this string.
        If the directory does not exist it is created.
        If Cache is provided this object is used as the storage for the cache instead
        of the default filesystem method.
        If `None`, no caching is enabled.

    Returns
    -------
    Callback
        a context manager that handles caching of function results and values.
    """
    if cache is None:
        # null callback, does no caching
        return Callback()
    if isinstance(cache, str):
        cache = LockedCache(cache)
    return dask.config.set(delayed_optimize=delayedoptimize(cache=cache))