Exemplo n.º 1
0
def test_make_meta_backends(index):

    dtypes = ["int8", "int32", "int64", "float64"]
    df = cudf.DataFrame(
        {dt: np.arange(start=0, stop=3, dtype=dt)
         for dt in dtypes})
    df["strings"] = ["cat", "dog", "fish"]
    df["cats"] = df["strings"].astype("category")
    df["time_s"] = np.array(["2018-10-07", "2018-10-08", "2018-10-09"],
                            dtype="datetime64[s]")
    df["time_ms"] = df["time_s"].astype("datetime64[ms]")
    df["time_ns"] = df["time_s"].astype("datetime64[ns]")
    df = df.set_index(index)

    # Check "empty" metadata types
    chk_meta = make_meta(df)
    dd.assert_eq(chk_meta.dtypes, df.dtypes)

    # Check "non-empty" metadata types
    chk_meta_nonempty = meta_nonempty(df)
    dd.assert_eq(chk_meta.dtypes, chk_meta_nonempty.dtypes)

    # Check dask code path if not MultiIndex
    if not isinstance(df.index, cudf.MultiIndex):

        ddf = dgd.from_cudf(df, npartitions=1)

        # Check "empty" metadata types
        dd.assert_eq(ddf._meta.dtypes, df.dtypes)

        # Check "non-empty" metadata types
        dd.assert_eq(ddf._meta.dtypes, ddf._meta_nonempty.dtypes)
Exemplo n.º 2
0
def make_meta_object(x, index=None):
    """Create an empty cudf object containing the desired metadata.

    Parameters
    ----------
    x : dict, tuple, list, cudf.Series, cudf.DataFrame, cudf.Index,
        dtype, scalar
        To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or
        an iterable of `(name, dtype)` tuples. To create a `Series`, provide a
        tuple of `(name, dtype)`. If a cudf object, names, dtypes, and index
        should match the desired output. If a dtype or scalar, a scalar of the
        same dtype is returned.
    index :  cudf.Index, optional
        Any cudf index to use in the metadata. If none provided, a
        `RangeIndex` will be used.

    Examples
    --------
    >>> make_meta([('a', 'i8'), ('b', 'O')])
    Empty DataFrame
    Columns: [a, b]
    Index: []
    >>> make_meta(('a', 'f8'))
    Series([], Name: a, dtype: float64)
    >>> make_meta('i8')
    1
    """
    if hasattr(x, "_meta"):
        return x._meta
    elif is_arraylike(x) and x.shape:
        return x[:0]

    if index is not None:
        index = make_meta(index)

    if isinstance(x, dict):
        return cudf.DataFrame(
            {c: _empty_series(c, d, index=index)
             for (c, d) in x.items()},
            index=index,
        )
    if isinstance(x, tuple) and len(x) == 2:
        return _empty_series(x[0], x[1], index=index)
    elif isinstance(x, (list, tuple)):
        if not all(isinstance(i, tuple) and len(i) == 2 for i in x):
            raise ValueError(
                f"Expected iterable of tuples of (name, dtype), got {x}")
        return cudf.DataFrame(
            {c: _empty_series(c, d, index=index)
             for (c, d) in x},
            columns=[c for c, d in x],
            index=index,
        )
    elif not hasattr(x, "dtype") and x is not None:
        # could be a string, a dtype object, or a python type. Skip `None`,
        # because it is implictly converted to `dtype('f8')`, which we don't
        # want here.
        try:
            dtype = np.dtype(x)
            return _scalar_from_dtype(dtype)
        except Exception:
            # Continue on to next check
            pass

    if is_scalar(x):
        return _nonempty_scalar(x)

    raise TypeError(f"Don't know how to create metadata from {x}")