示例#1
0
    def _setitem_tuple_arg(self, key, value):
        if isinstance(self._df.index, cudf.MultiIndex) or isinstance(
                self._df.columns, pd.MultiIndex):
            raise NotImplementedError(
                "Setting values using df.loc[] not supported on "
                "DataFrames with a MultiIndex")

        try:
            columns = self._get_column_selection(key[1])
        except KeyError:
            if not self._df.empty and isinstance(key[0], slice):
                pos_range = get_label_range_or_mask(self._df.index,
                                                    key[0].start, key[0].stop,
                                                    key[0].step)
                idx = self._df.index[pos_range]
            elif self._df.empty and isinstance(key[0], slice):
                idx = None
            else:
                idx = cudf.Index(key[0])
            if is_scalar(value):
                length = len(idx) if idx is not None else 1
                value = as_column(value, length=length)

            new_col = cudf.Series(value, index=idx)
            if not self._df.empty:
                new_col = new_col._align_to_index(self._df.index, how="right")

            if self._df.empty:
                self._df.index = (idx if idx is not None else cudf.RangeIndex(
                    len(new_col)))
            self._df._data.insert(key[1], new_col)
        else:
            for col in columns:
                self._df[col].loc[key[0]] = value
示例#2
0
 def __init__(
     self,
     data: Union[gpd.GeoSeries],
     index: Union[cudf.Index, pd.Index] = None,
     dtype=None,
     name=None,
     nan_as_null=True,
 ):
     # Condition index
     if isinstance(data, (gpGeoSeries, GeoSeries)):
         if index is None:
             index = data.index
     if index is None:
         index = cudf.RangeIndex(0, len(data))
     # Condition data
     if isinstance(data, pd.Series):
         data = gpGeoSeries(data)
     # Create column
     if isinstance(data, GeoColumn):
         column = data
     elif isinstance(data, GeoSeries):
         column = data._column
     elif isinstance(data, gpGeoSeries):
         adapter = GeoPandasAdapter(data)
         buffers = GeoArrowBuffers(adapter.get_geoarrow_host_buffers())
         pandas_meta = GeoMeta(adapter.get_geopandas_meta())
         column = GeoColumn(buffers, pandas_meta)
     else:
         raise TypeError(
             f"Incompatible object passed to GeoSeries ctor {type(data)}"
         )
     super().__init__(column, index, dtype, name, nan_as_null)
示例#3
0
def test_class_new_interpolation():
    t = cudf.Series(np.hstack((np.arange(5), ) * 3)).astype("float32")
    y = cudf.Series([3, 2, 3, 4, 3, 3, 2, 3, 4, 3, 3, 2, 3, 4,
                     3]).astype("float32")
    prefix_sum = cudf.Series(cp.arange(4) * 5).astype("int32")
    new_samples = cudf.Series(np.hstack(
        (np.linspace(0, 4, 9), ) * 3)).astype("float32")
    curve = cuspatial.CubicSpline(t, y, prefixes=prefix_sum)
    new_x = cudf.Series(np.repeat(np.arange(0, 3), 9)).astype("int32")
    old_x = cudf.Series(np.repeat(np.arange(0, 3), 5)).astype("int32")
    new_points = curve(new_samples, groups=new_x)
    old_points = curve(t, groups=old_x)
    new_points_at_control_points = new_points[0, 2, 4, 6, 8, 9, 11, 13, 15, 17,
                                              18, 20, 22, 24, 26]
    new_points_at_control_points.index = cudf.RangeIndex(
        0, len(new_points_at_control_points))
    assert_eq(new_points_at_control_points, old_points)
示例#4
0
def make_meta(x):
    """Create an empty cudf object containing the desired metadata.

    Parameters
    ----------
    x : dict, tuple, list, pd.Series, pd.DataFrame, pd.Index, dtype, scalar
        To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or
        an iterable of `(name, dtype)` tuples. To create a `Series`, provide a
        tuple of `(name, dtype)`. If a cudf object, names, dtypes, and index
        should match the desired output. If a dtype or scalar, a scalar of the
        same dtype is returned.

    Examples
    --------
    >>> make_meta([('a', 'i8'), ('b', 'O')])
    Empty DataFrame
    Columns: [a, b]
    Index: []
    >>> make_meta(('a', 'f8'))
    Series([], Name: a, dtype: float64)
    >>> make_meta('i8')
    1
    """
    if hasattr(x, "_meta"):
        return x._meta
    if isinstance(x, (cudf.Series, cudf.DataFrame, cudf.Index)):
        out = x[:2]
        return out.copy() if hasattr(out, "copy") else out

    meta = dd.utils.make_meta(x)

    if isinstance(meta, (pd.DataFrame, pd.Series, pd.Index)):
        meta2 = dd.utils.meta_nonempty(meta)
        if isinstance(meta2, pd.DataFrame):
            return cudf.DataFrame.from_pandas(meta2)
        elif isinstance(meta2, pd.Series):
            return cudf.Series(meta2)
        else:
            if isinstance(meta2, pd.RangeIndex):
                return cudf.RangeIndex(meta2.start, meta2.stop)
            return cudf.dataframe.GenericIndex(meta2)

    return meta
示例#5
0
def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
    """Concatenate DataFrames, Series, or Indices row-wise.

    Parameters
    ----------
    objs : list of DataFrame, Series, or Index
    axis : {0/'index', 1/'columns'}, default 0
        The axis to concatenate along.
    join : {'inner', 'outer'}, default 'outer'
        How to handle indexes on other axis (or axes).
    ignore_index : bool, default False
        Set True to ignore the index of the *objs* and provide a
        default range index instead.
    sort : bool, default False
        Sort non-concatenation axis if it is not already aligned.

    Returns
    -------
    A new object of like type with rows from each object in ``objs``.

    Examples
    --------
    Combine two ``Series``.

    >>> import cudf
    >>> s1 = cudf.Series(['a', 'b'])
    >>> s2 = cudf.Series(['c', 'd'])
    >>> s1
    0    a
    1    b
    dtype: object
    >>> s2
    0    c
    1    d
    dtype: object
    >>> cudf.concat([s1, s2])
    0    a
    1    b
    0    c
    1    d
    dtype: object

    Clear the existing index and reset it in the
    result by setting the ``ignore_index`` option to ``True``.

    >>> cudf.concat([s1, s2], ignore_index=True)
    0    a
    1    b
    2    c
    3    d
    dtype: object

    Combine two DataFrame objects with identical columns.

    >>> df1 = cudf.DataFrame([['a', 1], ['b', 2]],
    ...                    columns=['letter', 'number'])
    >>> df1
      letter  number
    0      a       1
    1      b       2
    >>> df2 = cudf.DataFrame([['c', 3], ['d', 4]],
    ...                    columns=['letter', 'number'])
    >>> df2
      letter  number
    0      c       3
    1      d       4
    >>> cudf.concat([df1, df2])
      letter  number
    0      a       1
    1      b       2
    0      c       3
    1      d       4

    Combine DataFrame objects with overlapping columns and return
    everything. Columns outside the intersection will
    be filled with ``null`` values.

    >>> df3 = cudf.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
    ...                    columns=['letter', 'number', 'animal'])
    >>> df3
      letter  number animal
    0      c       3    cat
    1      d       4    dog
    >>> cudf.concat([df1, df3], sort=False)
      letter  number animal
    0      a       1   <NA>
    1      b       2   <NA>
    0      c       3    cat
    1      d       4    dog

    Combine ``DataFrame`` objects with overlapping columns
    and return only those that are shared by passing ``inner`` to
    the ``join`` keyword argument.

    >>> cudf.concat([df1, df3], join="inner")
      letter  number
    0      a       1
    1      b       2
    0      c       3
    1      d       4

    Combine ``DataFrame`` objects horizontally along the
    x axis by passing in ``axis=1``.

    >>> df4 = cudf.DataFrame([['bird', 'polly'], ['monkey', 'george']],
    ...                    columns=['animal', 'name'])
    >>> df4
       animal    name
    0    bird   polly
    1  monkey  george
    >>> cudf.concat([df1, df4], axis=1)
      letter  number  animal    name
    0      a       1    bird   polly
    1      b       2  monkey  george
    """

    if not objs:
        raise ValueError("No objects to concatenate")

    objs = [obj for obj in objs if obj is not None]
    # Return for single object
    if len(objs) == 1:
        if ignore_index:
            if axis == 1:
                result = cudf.DataFrame(
                    data=objs[0]._data.copy(deep=True),
                    index=objs[0].index.copy(deep=True),
                )
                # TODO: Move following columns setting into
                # above constructor after following issue is fixed:
                # https://github.com/rapidsai/cudf/issues/6821
                result.columns = pd.RangeIndex(len(objs[0]._data.names))
            elif axis == 0:
                result = cudf.DataFrame(
                    data=objs[0]._data.copy(deep=True),
                    index=cudf.RangeIndex(len(objs[0])),
                )
        else:
            result = objs[0].copy()
        if sort:
            if axis == 0:
                return result.sort_index()
            elif not result.columns.is_monotonic:
                # TODO: Sorting by columns can be done
                # once the following issue is fixed:
                # https://github.com/rapidsai/cudf/issues/6821
                raise NotImplementedError(
                    "Sorting by columns is not yet supported"
                )
        else:
            return result

    if len(objs) == 0:
        raise ValueError("All objects passed were None")

    # Retrieve the base types of `objs`. In order to support sub-types
    # and object wrappers, we use `isinstance()` instead of comparing
    # types directly
    typs = set()
    for o in objs:
        if isinstance(o, cudf.MultiIndex):
            typs.add(cudf.MultiIndex)
        if issubclass(type(o), cudf.Index):
            typs.add(type(o))
        elif isinstance(o, cudf.DataFrame):
            typs.add(cudf.DataFrame)
        elif isinstance(o, cudf.Series):
            typs.add(cudf.Series)
        else:
            raise TypeError(f"cannot concatenate object of type {type(o)}")

    allowed_typs = {cudf.Series, cudf.DataFrame}

    param_axis = _axis_map.get(axis, None)
    if param_axis is None:
        raise ValueError(
            f'`axis` must be 0 / "index" or 1 / "columns", got: {param_axis}'
        )
    else:
        axis = param_axis

    # when axis is 1 (column) we can concat with Series and Dataframes
    if axis == 1:
        if not typs.issubset(allowed_typs):
            raise TypeError(
                "Can only concatenate Series and DataFrame objects when axis=1"
            )
        df = cudf.DataFrame()
        _normalize_series_and_dataframe(objs, axis=axis)

        old_objs = objs
        objs = [obj for obj in objs if obj.shape != (0, 0)]
        if len(objs) == 0:
            return df
        empty_inner = False
        if join == "inner":
            # don't filter out empty df's
            if any(obj.empty for obj in old_objs):
                empty_inner = True

        objs, match_index = _align_objs(objs, how=join)

        for idx, o in enumerate(objs):
            if idx == 0:
                df.index = o.index
            for col in o._data.names:
                if col in df._data:
                    raise NotImplementedError(
                        f"A Column with duplicate name found: {col}, cuDF "
                        f"doesn't support having multiple columns with "
                        f"same names yet."
                    )
                df[col] = o._data[col]

        result_columns = objs[0].columns
        for o in objs[1:]:
            result_columns = result_columns.append(o.columns)
        if ignore_index:
            # with ignore_index the column names change to numbers
            df.columns = pd.RangeIndex(len(result_columns.unique()))
        else:
            df.columns = result_columns.unique()
        if empty_inner:
            # if join is inner and it contains an empty df
            # we return an empty df
            return df.head(0)
        if not match_index and sort is not False:
            return df.sort_index()
        if sort or join == "inner":
            # when join='outer' and sort=False string indexes
            # are returned unsorted. Everything else seems
            # to be returned sorted when axis = 1
            return df.sort_index()
        else:
            return df

    typ = list(typs)[0]

    if len(typs) > 1:
        if allowed_typs == typs:
            # This block of code will run when `objs` has
            # both Series & DataFrame kind of inputs.
            _normalize_series_and_dataframe(objs, axis=axis)
            typ = cudf.DataFrame
        else:
            raise TypeError(
                f"`concat` cannot concatenate objects of "
                f"types: {sorted([t.__name__ for t in typs])}."
            )

    if typ is cudf.DataFrame:
        old_objs = objs
        objs = [obj for obj in objs if obj.shape != (0, 0)]
        if len(objs) == 0:
            # If objs is empty, that indicates all of
            # objs are empty dataframes.
            return cudf.DataFrame()
        elif len(objs) == 1:
            if join == "inner":
                data = None
            else:
                data = objs[0]._data.copy(deep=True)
            result = cudf.DataFrame(
                data=data,
                index=cudf.RangeIndex(len(objs[0]))
                if ignore_index
                else objs[0].index.copy(deep=True),
            )
            return result
        else:
            if join == "inner" and len(old_objs) != len(objs):
                # don't filter out empty df's
                objs = old_objs
            result = cudf.DataFrame._concat(
                objs,
                axis=axis,
                join=join,
                ignore_index=ignore_index,
                sort=sort,
            )
        return result

    elif typ is cudf.Series:
        objs = [obj for obj in objs if len(obj)]
        if len(objs) == 0:
            return cudf.Series()
        elif len(objs) == 1 and not ignore_index:
            return objs[0]
        else:
            return cudf.Series._concat(
                objs, axis=axis, index=None if ignore_index else True
            )
    elif typ is cudf.MultiIndex:
        return cudf.MultiIndex._concat(objs)
    elif issubclass(typ, cudf.Index):
        return cudf.Index._concat(objs)
    else:
        raise TypeError(f"cannot concatenate object of type {typ}")
示例#6
0
def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
    """Concatenate DataFrames, Series, or Indices row-wise.

    Parameters
    ----------
    objs : list of DataFrame, Series, or Index
    axis : {0/'index', 1/'columns'}, default 0
        The axis to concatenate along.
    join : {'inner', 'outer'}, default 'outer'
        How to handle indexes on other axis (or axes).
    ignore_index : bool, default False
        Set True to ignore the index of the *objs* and provide a
        default range index instead.
    sort : bool, default False
        Sort non-concatenation axis if it is not already aligned.

    Returns
    -------
    A new object of like type with rows from each object in ``objs``.

    Examples
    --------
    Combine two ``Series``.

    >>> import cudf
    >>> s1 = cudf.Series(['a', 'b'])
    >>> s2 = cudf.Series(['c', 'd'])
    >>> s1
    0    a
    1    b
    dtype: object
    >>> s2
    0    c
    1    d
    dtype: object
    >>> cudf.concat([s1, s2])
    0    a
    1    b
    0    c
    1    d
    dtype: object

    Clear the existing index and reset it in the
    result by setting the ``ignore_index`` option to ``True``.

    >>> cudf.concat([s1, s2], ignore_index=True)
    0    a
    1    b
    2    c
    3    d
    dtype: object

    Combine two DataFrame objects with identical columns.

    >>> df1 = cudf.DataFrame([['a', 1], ['b', 2]],
    ...                    columns=['letter', 'number'])
    >>> df1
      letter  number
    0      a       1
    1      b       2
    >>> df2 = cudf.DataFrame([['c', 3], ['d', 4]],
    ...                    columns=['letter', 'number'])
    >>> df2
      letter  number
    0      c       3
    1      d       4
    >>> cudf.concat([df1, df2])
      letter  number
    0      a       1
    1      b       2
    0      c       3
    1      d       4

    Combine DataFrame objects with overlapping columns and return
    everything. Columns outside the intersection will
    be filled with ``null`` values.

    >>> df3 = cudf.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
    ...                    columns=['letter', 'number', 'animal'])
    >>> df3
      letter  number animal
    0      c       3    cat
    1      d       4    dog
    >>> cudf.concat([df1, df3], sort=False)
      letter  number animal
    0      a       1   <NA>
    1      b       2   <NA>
    0      c       3    cat
    1      d       4    dog

    Combine ``DataFrame`` objects with overlapping columns
    and return only those that are shared by passing ``inner`` to
    the ``join`` keyword argument.

    >>> cudf.concat([df1, df3], join="inner")
      letter  number
    0      a       1
    1      b       2
    0      c       3
    1      d       4

    Combine ``DataFrame`` objects horizontally along the
    x axis by passing in ``axis=1``.

    >>> df4 = cudf.DataFrame([['bird', 'polly'], ['monkey', 'george']],
    ...                    columns=['animal', 'name'])
    >>> df4
       animal    name
    0    bird   polly
    1  monkey  george
    >>> cudf.concat([df1, df4], axis=1)
      letter  number  animal    name
    0      a       1    bird   polly
    1      b       2  monkey  george
    """

    # TODO: Do we really need to have different error messages for an empty
    # list and a list of None?
    if not objs:
        raise ValueError("No objects to concatenate")

    objs = [obj for obj in objs if obj is not None]

    if not objs:
        raise ValueError("All objects passed were None")

    # Return for single object
    if len(objs) == 1:
        obj = objs[0]

        if ignore_index:
            if axis == 1:
                result = cudf.DataFrame(
                    data=obj._data.copy(deep=True),
                    index=obj.index.copy(deep=True),
                )
                # The DataFrame constructor for dict-like data (such as the
                # ColumnAccessor given by obj._data here) will drop any columns
                # in the data that are not in `columns`, so we have to rename
                # after construction.
                result.columns = pd.RangeIndex(len(obj._data.names))
            elif axis == 0:
                if isinstance(obj, (pd.Series, cudf.Series)):
                    result = cudf.Series(
                        data=obj._data.copy(deep=True),
                        index=cudf.RangeIndex(len(obj)),
                    )
                else:
                    result = cudf.DataFrame(
                        data=obj._data.copy(deep=True),
                        index=cudf.RangeIndex(len(obj)),
                    )
        else:
            result = obj.copy()

        return result.sort_index(axis=axis) if sort else result

    # Retrieve the base types of `objs`. In order to support sub-types
    # and object wrappers, we use `isinstance()` instead of comparing
    # types directly
    typs = set()
    for o in objs:
        if isinstance(o, cudf.MultiIndex):
            typs.add(cudf.MultiIndex)
        elif isinstance(o, cudf.BaseIndex):
            typs.add(type(o))
        elif isinstance(o, cudf.DataFrame):
            typs.add(cudf.DataFrame)
        elif isinstance(o, cudf.Series):
            typs.add(cudf.Series)
        else:
            raise TypeError(f"cannot concatenate object of type {type(o)}")

    allowed_typs = {cudf.Series, cudf.DataFrame}

    axis = _AXIS_MAP.get(axis, None)
    if axis is None:
        raise ValueError(
            f'`axis` must be 0 / "index" or 1 / "columns", got: {axis}')

    # when axis is 1 (column) we can concat with Series and Dataframes
    if axis == 1:
        if not typs.issubset(allowed_typs):
            raise TypeError(
                "Can only concatenate Series and DataFrame objects when axis=1"
            )
        df = cudf.DataFrame()
        _normalize_series_and_dataframe(objs, axis=axis)

        # Inner joins involving empty data frames always return empty dfs, but
        # We must delay returning until we have set the column names.
        empty_inner = any(obj.empty for obj in objs) and join == "inner"

        objs = [obj for obj in objs if obj.shape != (0, 0)]

        if len(objs) == 0:
            return df

        objs, match_index = _align_objs(objs, how=join)

        df.index = objs[0].index
        for o in objs:
            for name, col in o._data.items():
                if name in df._data:
                    raise NotImplementedError(
                        f"A Column with duplicate name found: {name}, cuDF "
                        f"doesn't support having multiple columns with "
                        f"same names yet.")
                df[name] = col

        result_columns = objs[0].columns.append(
            [obj.columns for obj in objs[1:]])

        if ignore_index:
            # with ignore_index the column names change to numbers
            df.columns = pd.RangeIndex(len(result_columns.unique()))
        else:
            df.columns = result_columns.unique()

        if empty_inner:
            # if join is inner and it contains an empty df
            # we return an empty df
            return df.head(0)

        # This check uses `sort is not False` rather than just `sort=True`
        # to differentiate between a user-provided `False` value and the
        # default `None`. This is necessary for pandas compatibility, even
        # though `True` and `False` are the only valid options from the user.
        if not match_index and sort is not False:
            return df.sort_index()

        if sort or join == "inner":
            # when join='outer' and sort=False string indexes
            # are returned unsorted. Everything else seems
            # to be returned sorted when axis = 1
            return df.sort_index()
        else:
            return df

    # If we get here, we are always concatenating along axis 0 (the rows).
    typ = list(typs)[0]
    if len(typs) > 1:
        if allowed_typs == typs:
            # This block of code will run when `objs` has
            # both Series & DataFrame kind of inputs.
            _normalize_series_and_dataframe(objs, axis=axis)
            typ = cudf.DataFrame
        else:
            raise TypeError(f"`concat` cannot concatenate objects of "
                            f"types: {sorted([t.__name__ for t in typs])}.")

    if typ is cudf.DataFrame:
        old_objs = objs
        objs = [obj for obj in objs if obj.shape != (0, 0)]
        if len(objs) == 0:
            # If objs is empty, that indicates all of
            # objs are empty dataframes.
            return cudf.DataFrame()
        elif len(objs) == 1:
            obj = objs[0]
            result = cudf.DataFrame(
                data=None if join == "inner" else obj._data.copy(deep=True),
                index=cudf.RangeIndex(len(obj))
                if ignore_index else obj.index.copy(deep=True),
            )
            return result
        else:
            if join == "inner" and len(old_objs) != len(objs):
                # don't filter out empty df's
                objs = old_objs
            result = cudf.DataFrame._concat(
                objs,
                axis=axis,
                join=join,
                ignore_index=ignore_index,
                # Explicitly cast rather than relying on None being falsy.
                sort=bool(sort),
            )
        return result

    elif typ is cudf.Series:
        objs = [obj for obj in objs if len(obj)]
        if len(objs) == 0:
            return cudf.Series()
        elif len(objs) == 1 and not ignore_index:
            return objs[0]
        else:
            return cudf.Series._concat(objs,
                                       axis=axis,
                                       index=None if ignore_index else True)
    elif typ is cudf.MultiIndex:
        return cudf.MultiIndex._concat(objs)
    elif issubclass(typ, cudf.Index):
        return cudf.Index._concat(objs)
    else:
        raise TypeError(f"cannot concatenate object of type {typ}")
示例#7
0
def test_index_rangeindex_get_item_slices(rge, sl):
    pridx = pd.RangeIndex(*rge)
    gridx = cudf.RangeIndex(*rge)

    assert_eq(pridx[sl], gridx[sl])
示例#8
0
def test_index_rangeindex_get_item_null_range(rge):
    gridx = cudf.RangeIndex(*rge)

    with pytest.raises(IndexError):
        gridx[0]
示例#9
0
def test_index_rangeindex_get_item_out_of_bounds(rge):
    gridx = cudf.RangeIndex(*rge)
    with pytest.raises(IndexError):
        _ = gridx[4]
示例#10
0
def test_index_rangeindex_get_item_basic(rge):
    pridx = pd.RangeIndex(*rge)
    gridx = cudf.RangeIndex(*rge)

    for i in range(-len(pridx), len(pridx)):
        assert pridx[i] == gridx[i]
示例#11
0
文件: reshape.py 项目: igormp/cudf
def concat(objs, axis=0, ignore_index=False, sort=None):
    """Concatenate DataFrames, Series, or Indices row-wise.

    Parameters
    ----------
    objs : list of DataFrame, Series, or Index
    axis : {0/'index', 1/'columns'}, default 0
        The axis to concatenate along.
    ignore_index : bool, default False
        Set True to ignore the index of the *objs* and provide a
        default range index instead.
    sort : bool, default False
        Sort non-concatenation axis if it is not already aligned.

    Returns
    -------
    A new object of like type with rows from each object in ``objs``.

    Examples
    --------
    Combine two ``Series``.

    >>> import cudf
    >>> s1 = cudf.Series(['a', 'b'])
    >>> s2 = cudf.Series(['c', 'd'])
    >>> s1
    0    a
    1    b
    dtype: object
    >>> s2
    0    c
    1    d
    dtype: object
    >>> cudf.concat([s1, s2])
    0    a
    1    b
    0    c
    1    d
    dtype: object

    Clear the existing index and reset it in the
    result by setting the ``ignore_index`` option to ``True``.

    >>> cudf.concat([s1, s2], ignore_index=True)
    0    a
    1    b
    2    c
    3    d
    dtype: object

    Combine two DataFrame objects with identical columns.

    >>> df1 = cudf.DataFrame([['a', 1], ['b', 2]],
    ...                    columns=['letter', 'number'])
    >>> df1
      letter  number
    0      a       1
    1      b       2
    >>> df2 = cudf.DataFrame([['c', 3], ['d', 4]],
    ...                    columns=['letter', 'number'])
    >>> df2
      letter  number
    0      c       3
    1      d       4
    >>> cudf.concat([df1, df2])
      letter  number
    0      a       1
    1      b       2
    0      c       3
    1      d       4

    Combine DataFrame objects with overlapping columns and return
    everything. Columns outside the intersection will
    be filled with ``null`` values.

    >>> df3 = cudf.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
    ...                    columns=['letter', 'number', 'animal'])
    >>> df3
    letter  number animal
    0      c       3    cat
    1      d       4    dog
    >>> cudf.concat([df1, df3], sort=False)
      letter  number animal
    0      a       1   None
    1      b       2   None
    0      c       3    cat
    1      d       4    dog

    Combine ``DataFrame`` objects horizontally along the
    x axis by passing in ``axis=1``.

    >>> df4 = cudf.DataFrame([['bird', 'polly'], ['monkey', 'george']],
    ...                    columns=['animal', 'name'])
    >>> df4
       animal    name
    0    bird   polly
    1  monkey  george
    >>> cudf.concat([df1, df4], axis=1)
      letter  number  animal    name
    0      a       1    bird   polly
    1      b       2  monkey  george
    """

    if not objs:
        raise ValueError("No objects to concatenate")

    objs = [obj for obj in objs if obj is not None]

    # Return for single object
    if len(objs) == 1:
        if ignore_index:
            result = cudf.DataFrame(
                data=objs[0]._data.copy(deep=True),
                index=cudf.RangeIndex(len(objs[0])),
            )
        else:
            result = objs[0].copy()
        return result

    if len(objs) == 0:
        raise ValueError("All objects passed were None")

    # Retrieve the base types of `objs`. In order to support sub-types
    # and object wrappers, we use `isinstance()` instead of comparing
    # types directly
    typs = set()
    for o in objs:
        if isinstance(o, cudf.MultiIndex):
            typs.add(cudf.MultiIndex)
        if issubclass(type(o), cudf.Index):
            typs.add(type(o))
        elif isinstance(o, cudf.DataFrame):
            typs.add(cudf.DataFrame)
        elif isinstance(o, cudf.Series):
            typs.add(cudf.Series)
        else:
            raise TypeError(f"cannot concatenate object of type {type(o)}")

    allowed_typs = {cudf.Series, cudf.DataFrame}

    param_axis = _axis_map.get(axis, None)
    if param_axis is None:
        raise ValueError(
            f'`axis` must be 0 / "index" or 1 / "columns", got: {param_axis}')
    else:
        axis = param_axis

    # when axis is 1 (column) we can concat with Series and Dataframes
    if axis == 1:

        assert typs.issubset(allowed_typs)
        df = cudf.DataFrame()
        _normalize_series_and_dataframe(objs, axis=axis)

        objs, match_index = _align_objs(objs)

        for idx, o in enumerate(objs):
            if not ignore_index and idx == 0:
                df.index = o.index
            for col in o._data.names:
                if col in df._data:
                    raise NotImplementedError(
                        f"A Column with duplicate name found: {col}, cuDF "
                        f"doesn't support having multiple columns with "
                        f"same names yet.")
                df[col] = o._data[col]

        result_columns = objs[0].columns
        for o in objs[1:]:
            result_columns = result_columns.append(o.columns)

        df.columns = result_columns.unique()
        if ignore_index:
            df.index = cudf.RangeIndex(len(objs[0]))
            return df
        elif not match_index:
            return df.sort_index()
        else:
            return df

    typ = list(typs)[0]

    if len(typs) > 1:
        if allowed_typs == typs:
            # This block of code will run when `objs` has
            # both Series & DataFrame kind of inputs.
            _normalize_series_and_dataframe(objs, axis=axis)
            typ = cudf.DataFrame
        else:
            raise TypeError(f"`concat` cannot concatenate objects of "
                            f"types: {sorted([t.__name__ for t in typs])}.")

    if typ is cudf.DataFrame:
        objs = [obj for obj in objs if obj.shape != (0, 0)]
        if len(objs) == 0:
            # If objs is empty, that indicates all of
            # objs are empty dataframes.
            return cudf.DataFrame()
        elif len(objs) == 1:
            if ignore_index:
                result = cudf.DataFrame(
                    data=objs[0]._data.copy(deep=True),
                    index=cudf.RangeIndex(len(objs[0])),
                )
            else:
                result = objs[0].copy()
            return result
        else:
            return cudf.DataFrame._concat(objs,
                                          axis=axis,
                                          ignore_index=ignore_index,
                                          sort=sort)
    elif typ is cudf.Series:
        objs = [obj for obj in objs if len(obj)]
        if len(objs) == 0:
            return cudf.Series()
        elif len(objs) == 1 and not ignore_index:
            return objs[0]
        else:
            return cudf.Series._concat(objs,
                                       axis=axis,
                                       index=None if ignore_index else True)
    elif typ is cudf.MultiIndex:
        return cudf.MultiIndex._concat(objs)
    elif issubclass(typ, cudf.Index):
        return cudf.Index._concat(objs)
    else:
        raise TypeError(f"cannot concatenate object of type {typ}")