def concat(objs, axis=0, ignore_index=False, sort=None): """Concatenate DataFrames, Series, or Indices row-wise. Parameters ---------- objs : list of DataFrame, Series, or Index axis : {0/'index', 1/'columns'}, default 0 The axis to concatenate along. ignore_index : bool, default False Set True to ignore the index of the *objs* and provide a default range index instead. Returns ------- A new object of like type with rows from each object in ``objs``. """ if sort not in (None, False): raise NotImplementedError("sort parameter is not yet supported") if not objs: raise ValueError("Need at least one object to concatenate") # no-op for single object if len(objs) == 1: return objs[0] typs = set(type(o) for o in objs) allowed_typs = {Series, DataFrame} param_axis = _axis_map.get(axis, None) if param_axis is None: raise ValueError( '`axis` must be 0 / "index" or 1 / "columns", got: {0}'.format( param_axis)) else: axis = param_axis # when axis is 1 (column) we can concat with Series and Dataframes if axis == 1: assert typs.issubset(allowed_typs) df = DataFrame() for idx, o in enumerate(objs): if isinstance(o, Series): name = o.name if o.name is None: # pandas uses 0-offset name = idx - 1 df[name] = o else: for col in o.columns: df[col] = o[col] return df if len(typs) > 1: raise ValueError("`concat` expects all objects to be of the same " "type. Got mix of %r." % [t.__name__ for t in typs]) typ = list(typs)[0] if typ is DataFrame: return DataFrame._concat(objs, axis=axis, ignore_index=ignore_index) elif typ is Series: return Series._concat(objs, axis=axis) elif issubclass(typ, Index): return Index._concat(objs) else: raise ValueError("Unknown type %r" % typ)
def concat(objs, axis=0, ignore_index=False, sort=None): """Concatenate DataFrames, Series, or Indices row-wise. Parameters ---------- objs : list of DataFrame, Series, or Index axis : {0/'index', 1/'columns'}, default 0 The axis to concatenate along. ignore_index : bool, default False Set True to ignore the index of the *objs* and provide a default range index instead. Returns ------- A new object of like type with rows from each object in ``objs``. """ if sort not in (None, False): raise NotImplementedError("sort parameter is not yet supported") if not objs: raise ValueError("Need at least one object to concatenate") objs = [obj for obj in objs if obj is not None] # Return for single object if len(objs) == 1: return objs[0] if len(objs) == 0: raise ValueError("All objects passed were None") typs = set(type(o) for o in objs) allowed_typs = {Series, DataFrame} param_axis = _axis_map.get(axis, None) if param_axis is None: raise ValueError( '`axis` must be 0 / "index" or 1 / "columns", got: {0}'.format( param_axis ) ) else: axis = param_axis # when axis is 1 (column) we can concat with Series and Dataframes if axis == 1: assert typs.issubset(allowed_typs) df = DataFrame() sr_name = 0 for idx, o in enumerate(objs): if isinstance(o, Series): name = o.name if name is None: name = sr_name sr_name += 1 objs[idx] = o.to_frame(name=name) for idx, o in enumerate(objs): if idx == 0: df.index = o.index for col in o._data.names: if col in df._data: raise NotImplementedError( "A Column with duplicate name found: {0}, cuDF\ doesn't support having multiple columns with\ same names yet.".format( col ) ) df[col] = o._data[col] result_columns = objs[0].columns for o in objs[1:]: result_columns = result_columns.append(o.columns) df.columns = result_columns return df typ = list(typs)[0] if len(typs) > 1: raise ValueError( "`concat` expects all objects to be of the same " "type. Got mix of %r." % [t.__name__ for t in typs] ) typ = list(typs)[0] if typ is DataFrame: return DataFrame._concat(objs, axis=axis, ignore_index=ignore_index) elif typ is Series: return Series._concat(objs, axis=axis) elif typ is cudf.MultiIndex: return cudf.MultiIndex._concat(objs) elif issubclass(typ, Index): return Index._concat(objs) else: raise ValueError("Unknown type %r" % typ)
def concat(objs, axis=0, ignore_index=False, sort=None): """Concatenate DataFrames, Series, or Indices row-wise. Parameters ---------- objs : list of DataFrame, Series, or Index axis : {0/'index', 1/'columns'}, default 0 The axis to concatenate along. ignore_index : bool, default False Set True to ignore the index of the *objs* and provide a default range index instead. sort : bool, default False Sort non-concatenation axis if it is not already aligned. Returns ------- A new object of like type with rows from each object in ``objs``. Examples -------- Combine two ``Series``. >>> import cudf >>> s1 = cudf.Series(['a', 'b']) >>> s2 = cudf.Series(['c', 'd']) >>> s1 0 a 1 b dtype: object >>> s2 0 c 1 d dtype: object >>> cudf.concat([s1, s2]) 0 a 1 b 0 c 1 d dtype: object Clear the existing index and reset it in the result by setting the ``ignore_index`` option to ``True``. >>> cudf.concat([s1, s2], ignore_index=True) 0 a 1 b 2 c 3 d dtype: object Combine two DataFrame objects with identical columns. >>> df1 = cudf.DataFrame([['a', 1], ['b', 2]], ... columns=['letter', 'number']) >>> df1 letter number 0 a 1 1 b 2 >>> df2 = cudf.DataFrame([['c', 3], ['d', 4]], ... columns=['letter', 'number']) >>> df2 letter number 0 c 3 1 d 4 >>> cudf.concat([df1, df2]) letter number 0 a 1 1 b 2 0 c 3 1 d 4 Combine DataFrame objects with overlapping columns and return everything. Columns outside the intersection will be filled with ``null`` values. >>> df3 = cudf.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], ... columns=['letter', 'number', 'animal']) >>> df3 letter number animal 0 c 3 cat 1 d 4 dog >>> cudf.concat([df1, df3], sort=False) letter number animal 0 a 1 None 1 b 2 None 0 c 3 cat 1 d 4 dog Combine ``DataFrame`` objects horizontally along the x axis by passing in ``axis=1``. >>> df4 = cudf.DataFrame([['bird', 'polly'], ['monkey', 'george']], ... columns=['animal', 'name']) >>> df4 animal name 0 bird polly 1 monkey george >>> cudf.concat([df1, df4], axis=1) letter number animal name 0 a 1 bird polly 1 b 2 monkey george """ if not objs: raise ValueError("No objects to concatenate") objs = [obj for obj in objs if obj is not None] # Return for single object if len(objs) == 1: if ignore_index: result = cudf.DataFrame( data=objs[0]._data.copy(deep=True), index=cudf.RangeIndex(len(objs[0])), ) else: result = objs[0].copy() return result if len(objs) == 0: raise ValueError("All objects passed were None") # Retrieve the base types of `objs`. In order to support sub-types # and object wrappers, we use `isinstance()` instead of comparing # types directly typs = set() for o in objs: if isinstance(o, cudf.MultiIndex): typs.add(cudf.MultiIndex) if issubclass(type(o), Index): typs.add(type(o)) elif isinstance(o, DataFrame): typs.add(DataFrame) elif isinstance(o, Series): typs.add(Series) else: raise ValueError(f"cannot concatenate object of type {type(o)}") allowed_typs = {Series, DataFrame} param_axis = _axis_map.get(axis, None) if param_axis is None: raise ValueError( '`axis` must be 0 / "index" or 1 / "columns", got: {0}'.format( param_axis ) ) else: axis = param_axis # when axis is 1 (column) we can concat with Series and Dataframes if axis == 1: assert typs.issubset(allowed_typs) df = DataFrame() _normalize_series_and_dataframe(objs, axis=axis) objs, match_index = _align_objs(objs) for idx, o in enumerate(objs): if not ignore_index and idx == 0: df.index = o.index for col in o._data.names: if col in df._data: raise NotImplementedError( "A Column with duplicate name found: {0}, cuDF\ doesn't support having multiple columns with\ same names yet.".format( col ) ) df[col] = o._data[col] result_columns = objs[0].columns for o in objs[1:]: result_columns = result_columns.append(o.columns) df.columns = result_columns.unique() if ignore_index: df.index = None return df elif not match_index: return df.sort_index() else: return df typ = list(typs)[0] if len(typs) > 1: if allowed_typs == typs: # This block of code will run when `objs` has # both Series & DataFrame kind of inputs. _normalize_series_and_dataframe(objs, axis=axis) typ = DataFrame else: raise ValueError( "`concat` cannot concatenate objects of " "types: %r." % sorted([t.__name__ for t in typs]) ) if typ is DataFrame: objs = [obj for obj in objs if obj.shape != (0, 0)] if len(objs) == 0: # If objs is empty, that indicates all of # objs are empty dataframes. return cudf.DataFrame() elif len(objs) == 1: if ignore_index: result = cudf.DataFrame( data=objs[0]._data.copy(deep=True), index=cudf.RangeIndex(len(objs[0])), ) else: result = objs[0].copy() return result else: return DataFrame._concat( objs, axis=axis, ignore_index=ignore_index, sort=sort ) elif typ is Series: return Series._concat( objs, axis=axis, index=None if ignore_index else True ) elif typ is cudf.MultiIndex: return cudf.MultiIndex._concat(objs) elif issubclass(typ, Index): return Index._concat(objs) else: raise ValueError(f"cannot concatenate object of type {typ}")