Пример #1
0
def test_has_keyword():
    def foo(a, b, c=None):
        pass
    assert has_keyword(foo, 'a')
    assert has_keyword(foo, 'b')
    assert has_keyword(foo, 'c')

    bar = functools.partial(foo, a=1)
    assert has_keyword(bar, 'b')
    assert has_keyword(bar, 'c')
Пример #2
0
def test_has_keyword():
    def foo(a, b, c=None):
        pass
    assert has_keyword(foo, 'a')
    assert has_keyword(foo, 'b')
    assert has_keyword(foo, 'c')

    bar = functools.partial(foo, a=1)
    assert has_keyword(bar, 'b')
    assert has_keyword(bar, 'c')
Пример #3
0
def test_has_keyword():
    def foo(a, b, c=None):
        pass

    assert has_keyword(foo, "a")
    assert has_keyword(foo, "b")
    assert has_keyword(foo, "c")

    bar = functools.partial(foo, a=1)
    assert has_keyword(bar, "b")
    assert has_keyword(bar, "c")
Пример #4
0
    def apply(
        self,
        func,
        raw=None,
        engine="cython",
        engine_kwargs=None,
        args=None,
        kwargs=None,
    ):
        compat_kwargs = {}
        kwargs = kwargs or {}
        args = args or ()
        meta = self.obj._meta.rolling(0)
        if has_keyword(meta.apply, "engine"):
            # PANDAS_GT_100
            compat_kwargs = dict(engine=engine, engine_kwargs=engine_kwargs)
        if raw is None:
            # PANDAS_GT_100: The default changed from None to False
            raw = inspect.signature(meta.apply).parameters["raw"]

        return self._call_method("apply",
                                 func,
                                 raw=raw,
                                 args=args,
                                 kwargs=kwargs,
                                 **compat_kwargs)
Пример #5
0
def compute_meta(func, _dtype, *args, **kwargs):
    with np.errstate(all="ignore"), warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)

        args_meta = [
            meta_from_array(x) if is_arraylike(x) else x for x in args
        ]
        kwargs_meta = {
            k: meta_from_array(v) if is_arraylike(v) else v
            for k, v in kwargs.items()
        }

        # todo: look for alternative to this, causes issues when using map_blocks()
        # with np.vectorize, such as dask.array.routines._isnonzero_vec().
        if isinstance(func, np.vectorize):
            meta = func(*args_meta)
        else:
            try:
                # some reduction functions need to know they are computing meta
                if has_keyword(func, "computing_meta"):
                    kwargs_meta["computing_meta"] = True
                meta = func(*args_meta, **kwargs_meta)
            except TypeError as e:
                if any(s in str(e) for s in [
                        "unexpected keyword argument",
                        "is an invalid keyword for",
                        "Did not understand the following kwargs",
                ]):
                    raise
                else:
                    return None
            except ValueError as e:
                # min/max functions have no identity, just use the same input type when there's only one
                if len(
                        args_meta
                ) == 1 and "zero-size array to reduction operation" in str(e):
                    meta = args_meta[0]
                else:
                    return None
            except Exception:
                return None

        if _dtype and getattr(meta, "dtype", None) != _dtype:
            with contextlib.suppress(AttributeError):
                meta = meta.astype(_dtype)

        if np.isscalar(meta):
            meta = np.array(meta)

        return meta
Пример #6
0
def _create_task(func, smaller_src_arrays, src_block_info, dst_arrays,
                 dst_block_info, position, fill_value, kwargs):
    """Create a task for resample_blocks."""
    from dask.utils import has_keyword
    dependencies = []
    args = []
    for smaller_data in smaller_src_arrays:
        args.append((smaller_data.name, *([0] * smaller_data.ndim)))
        dependencies.append(smaller_data)
    for dst_array in dst_arrays:
        dst_position = [0] * (dst_array.ndim - 2) + list(position[-2:])
        args.append((dst_array.name, *dst_position))
    func_kwargs = kwargs.copy()
    func_kwargs['fill_value'] = fill_value
    if has_keyword(func, "block_info"):
        func_kwargs["block_info"] = {0: src_block_info, None: dst_block_info}
    pfunc = partial(func, **func_kwargs)
    task = (pfunc, *args)
    return task, dependencies
Пример #7
0
def map_overlap(
    func,
    df,
    before,
    after,
    *args,
    meta=no_default,
    enforce_metadata=True,
    transform_divisions=True,
    align_dataframes=True,
    **kwargs,
):
    """Apply a function to each partition, sharing rows with adjacent partitions.

    Parameters
    ----------
    func : function
        The function applied to each partition. If this function accepts
        the special ``partition_info`` keyword argument, it will recieve
        information on the partition's relative location within the
        dataframe.
    df: dd.DataFrame, dd.Series
    args, kwargs :
        Positional and keyword arguments to pass to the function.
        Positional arguments are computed on a per-partition basis, while
        keyword arguments are shared across all partitions. The partition
        itself will be the first positional argument, with all other
        arguments passed *after*. Arguments can be ``Scalar``, ``Delayed``,
        or regular Python objects. DataFrame-like args (both dask and
        pandas) will be repartitioned to align (if necessary) before
        applying the function; see ``align_dataframes`` to control this
        behavior.
    enforce_metadata : bool, default True
        Whether to enforce at runtime that the structure of the DataFrame
        produced by ``func`` actually matches the structure of ``meta``.
        This will rename and reorder columns for each partition,
        and will raise an error if this doesn't work or types don't match.
    before : int or timedelta
        The rows to prepend to partition ``i`` from the end of
        partition ``i - 1``.
    after : int or timedelta
        The rows to append to partition ``i`` from the beginning
        of partition ``i + 1``.
    transform_divisions : bool, default True
        Whether to apply the function onto the divisions and apply those
        transformed divisions to the output.
    align_dataframes : bool, default True
        Whether to repartition DataFrame- or Series-like args
        (both dask and pandas) so their divisions align before applying
        the function. This requires all inputs to have known divisions.
        Single-partition inputs will be split into multiple partitions.

        If False, all inputs must have either the same number of partitions
        or a single partition. Single-partition inputs will be broadcast to
        every partition of multi-partition inputs.
    $META

    See Also
    --------
    dd.DataFrame.map_overlap
    """
    args = (df, ) + args

    dfs = [df for df in args if isinstance(df, _Frame)]

    if isinstance(before, datetime.timedelta) or isinstance(
            after, datetime.timedelta):
        if not is_datetime64_any_dtype(
                dfs[0].index._meta_nonempty.inferred_type):
            raise TypeError(
                "Must have a `DatetimeIndex` when using string offset "
                "for `before` and `after`")
    else:
        if not (isinstance(before, Integral) and before >= 0
                and isinstance(after, Integral) and after >= 0):
            raise ValueError("before and after must be positive integers")

    name = kwargs.pop("token", None)
    parent_meta = kwargs.pop("parent_meta", None)

    assert callable(func)
    if name is not None:
        token = tokenize(meta, before, after, *args, **kwargs)
    else:
        name = "overlap-" + funcname(func)
        token = tokenize(func, meta, before, after, *args, **kwargs)
    name = f"{name}-{token}"

    if align_dataframes:
        args = _maybe_from_pandas(args)
        try:
            args = _maybe_align_partitions(args)
        except ValueError as e:
            raise ValueError(
                f"{e}. If you don't want the partitions to be aligned, and are "
                "calling `map_overlap` directly, pass `align_dataframes=False`."
            ) from e

    meta = _get_meta_map_partitions(args, dfs, func, kwargs, meta, parent_meta)

    if all(isinstance(arg, Scalar) for arg in args):
        layer = {
            (name, 0): (
                apply,
                func,
                (tuple, [(arg._name, 0) for arg in args]),
                kwargs,
            )
        }
        graph = HighLevelGraph.from_collections(name, layer, dependencies=args)
        return Scalar(graph, name, meta)

    args2 = []
    dependencies = []

    divisions = _get_divisions_map_partitions(align_dataframes,
                                              transform_divisions, dfs, func,
                                              args, kwargs)

    def _handle_frame_argument(arg):
        dsk = {}

        prevs_parts_dsk, prevs = _get_previous_partitions(arg, before)
        dsk.update(prevs_parts_dsk)

        nexts_parts_dsk, nexts = _get_nexts_partitions(arg, after)
        dsk.update(nexts_parts_dsk)

        name_a = "overlap-concat-" + tokenize(arg)
        for i, (prev, current,
                next) in enumerate(zip(prevs, arg.__dask_keys__(), nexts)):
            key = (name_a, i)
            dsk[key] = (_combined_parts, prev, current, next, before, after)

        graph = HighLevelGraph.from_collections(name_a,
                                                dsk,
                                                dependencies=[arg])
        return new_dd_object(graph, name_a, meta, divisions)

    for arg in args:
        if isinstance(arg, _Frame):
            arg = _handle_frame_argument(arg)
            args2.append(arg)
            dependencies.append(arg)
            continue
        arg = normalize_arg(arg)
        arg2, collections = unpack_collections(arg)
        if collections:
            args2.append(arg2)
            dependencies.extend(collections)
        else:
            args2.append(arg)

    kwargs3 = {}
    simple = True
    for k, v in kwargs.items():
        v = normalize_arg(v)
        v, collections = unpack_collections(v)
        dependencies.extend(collections)
        kwargs3[k] = v
        if collections:
            simple = False

    if has_keyword(func, "partition_info"):
        partition_info = {(i, ): {
            "number": i,
            "division": division
        }
                          for i, division in enumerate(divisions[:-1])}

        args2.insert(0, BlockwiseDepDict(partition_info))
        orig_func = func

        def func(partition_info, *args, **kwargs):
            return orig_func(*args, **kwargs, partition_info=partition_info)

    if enforce_metadata:
        dsk = partitionwise_graph(
            apply_and_enforce,
            name,
            func,
            before,
            after,
            *args2,
            dependencies=dependencies,
            _func=overlap_chunk,
            _meta=meta,
            **kwargs3,
        )
    else:
        kwargs4 = kwargs if simple else kwargs3
        dsk = partitionwise_graph(
            overlap_chunk,
            name,
            func,
            before,
            after,
            *args2,
            **kwargs4,
            dependencies=dependencies,
        )

    graph = HighLevelGraph.from_collections(name,
                                            dsk,
                                            dependencies=dependencies)
    return new_dd_object(graph, name, meta, divisions)