def test_make_meta_backends(index): dtypes = ["int8", "int32", "int64", "float64"] df = cudf.DataFrame( {dt: np.arange(start=0, stop=3, dtype=dt) for dt in dtypes}) df["strings"] = ["cat", "dog", "fish"] df["cats"] = df["strings"].astype("category") df["time_s"] = np.array(["2018-10-07", "2018-10-08", "2018-10-09"], dtype="datetime64[s]") df["time_ms"] = df["time_s"].astype("datetime64[ms]") df["time_ns"] = df["time_s"].astype("datetime64[ns]") df = df.set_index(index) # Check "empty" metadata types chk_meta = make_meta(df) dd.assert_eq(chk_meta.dtypes, df.dtypes) # Check "non-empty" metadata types chk_meta_nonempty = meta_nonempty(df) dd.assert_eq(chk_meta.dtypes, chk_meta_nonempty.dtypes) # Check dask code path if not MultiIndex if not isinstance(df.index, cudf.MultiIndex): ddf = dgd.from_cudf(df, npartitions=1) # Check "empty" metadata types dd.assert_eq(ddf._meta.dtypes, df.dtypes) # Check "non-empty" metadata types dd.assert_eq(ddf._meta.dtypes, ddf._meta_nonempty.dtypes)
def make_meta_object(x, index=None): """Create an empty cudf object containing the desired metadata. Parameters ---------- x : dict, tuple, list, cudf.Series, cudf.DataFrame, cudf.Index, dtype, scalar To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or an iterable of `(name, dtype)` tuples. To create a `Series`, provide a tuple of `(name, dtype)`. If a cudf object, names, dtypes, and index should match the desired output. If a dtype or scalar, a scalar of the same dtype is returned. index : cudf.Index, optional Any cudf index to use in the metadata. If none provided, a `RangeIndex` will be used. Examples -------- >>> make_meta([('a', 'i8'), ('b', 'O')]) Empty DataFrame Columns: [a, b] Index: [] >>> make_meta(('a', 'f8')) Series([], Name: a, dtype: float64) >>> make_meta('i8') 1 """ if hasattr(x, "_meta"): return x._meta elif is_arraylike(x) and x.shape: return x[:0] if index is not None: index = make_meta(index) if isinstance(x, dict): return cudf.DataFrame( {c: _empty_series(c, d, index=index) for (c, d) in x.items()}, index=index, ) if isinstance(x, tuple) and len(x) == 2: return _empty_series(x[0], x[1], index=index) elif isinstance(x, (list, tuple)): if not all(isinstance(i, tuple) and len(i) == 2 for i in x): raise ValueError( f"Expected iterable of tuples of (name, dtype), got {x}") return cudf.DataFrame( {c: _empty_series(c, d, index=index) for (c, d) in x}, columns=[c for c, d in x], index=index, ) elif not hasattr(x, "dtype") and x is not None: # could be a string, a dtype object, or a python type. Skip `None`, # because it is implictly converted to `dtype('f8')`, which we don't # want here. try: dtype = np.dtype(x) return _scalar_from_dtype(dtype) except Exception: # Continue on to next check pass if is_scalar(x): return _nonempty_scalar(x) raise TypeError(f"Don't know how to create metadata from {x}")