def column_empty(row_count, dtype="object", masked=False): """Allocate a new column like the given row_count and dtype. """ dtype = pd.api.types.pandas_dtype(dtype) children = () if is_categorical_dtype(dtype): data = None children = (build_column( data=Buffer.empty(row_count * np.dtype("int32").itemsize), dtype="int32", ), ) elif dtype.kind in "OU": data = None children = ( build_column( data=Buffer.empty( (row_count + 1) * np.dtype("int32").itemsize), dtype="int32", ), build_column( data=Buffer.empty(row_count * np.dtype("int8").itemsize), dtype="int8", ), ) else: data = Buffer.empty(row_count * dtype.itemsize) if masked: mask = Buffer(cudautils.make_empty_mask(row_count)) else: mask = None return build_column(data, dtype, mask=mask, children=children)
def column_empty(row_count, dtype="object", masked=False): """Allocate a new column like the given row_count and dtype. """ dtype = pd.api.types.pandas_dtype(dtype) children = () if is_categorical_dtype(dtype): data = None children = (build_column( data=Buffer.empty(row_count * np.dtype("int32").itemsize), dtype="int32", ), ) elif dtype.kind in "OU": data = None children = ( build_column( data=Buffer(cupy.zeros(row_count + 1, dtype="int32")), dtype="int32", ), build_column( data=Buffer.empty(row_count * np.dtype("int8").itemsize), dtype="int8", ), ) else: data = Buffer.empty(row_count * dtype.itemsize) if masked: mask = create_null_mask(row_count, state=MaskState.ALL_NULL) else: mask = None return build_column(data, dtype, mask=mask, children=children)
def __init__(self, dtype, mask=None, offset=0, children=()): """ Parameters ---------- dtype : CategoricalDtype mask : Buffer The validity mask offset : int Data offset children : Tuple[Column] Two non-null columns containing the categories and codes respectively """ data = Buffer.empty(0) size = children[0].size if isinstance(dtype, pd.api.types.CategoricalDtype): dtype = CategoricalDtype.from_pandas(dtype) if not isinstance(dtype, CategoricalDtype): raise ValueError("dtype must be instance of CategoricalDtype") super().__init__( data, size=size, dtype=dtype, mask=mask, offset=offset, children=children, ) self._codes = None
def buffers_from_pyarrow(pa_arr, dtype=None): """ Given a pyarrow array returns a 5 length tuple of: - size - offset - cudf.Buffer --> mask - cudf.Buffer --> data - cudf.Buffer --> string characters """ from cudf._libxx.null_mask import bitmask_allocation_size_bytes buffers = pa_arr.buffers() if pa_arr.null_count: mask_size = bitmask_allocation_size_bytes(len(pa_arr)) pamask = pyarrow_buffer_to_cudf_buffer(buffers[0], mask_size=mask_size) else: pamask = None offset = pa_arr.offset size = len(pa_arr) if buffers[1]: padata = pyarrow_buffer_to_cudf_buffer(buffers[1]) else: padata = Buffer.empty(0) pastrs = None if isinstance(pa_arr, pa.StringArray): pastrs = pyarrow_buffer_to_cudf_buffer(buffers[2]) return (size, offset, pamask, padata, pastrs)
def __init__(self, mask=None, offset=0, children=()): """ Parameters ---------- mask : Buffer The validity mask offset : int Data offset children : Tuple[Column] Two non-null columns containing the string data and offsets respectively """ data = Buffer.empty(0) dtype = np.dtype("object") if children[0].size == 0: size = 0 else: # one less because the last element of offsets is the number of # bytes in the data buffer size = children[0].size - 1 super().__init__(data, size, dtype, mask=mask, children=children) self._nvstrings = None self._nvcategory = None self._indices = None
def buffers_from_pyarrow(pa_arr, dtype=None): """ Given a pyarrow array returns a 5 length tuple of: - size - offset - cudf.Buffer --> mask - cudf.Buffer --> data - cudf.Buffer --> string characters """ from cudf.core.buffer import Buffer from cudf.utils.cudautils import copy_array buffers = pa_arr.buffers() if pa_arr.null_count: mask_dev_array = make_mask(len(pa_arr)) arrow_dev_array = rmm.to_device(np.asarray(buffers[0]).view("int8")) copy_array(arrow_dev_array, mask_dev_array) pamask = Buffer(mask_dev_array) else: pamask = None offset = pa_arr.offset size = len(pa_arr) if dtype: data_dtype = dtype elif isinstance(pa_arr, pa.StringArray): data_dtype = np.int32 else: if isinstance(pa_arr, pa.DictionaryArray): data_dtype = pa_arr.indices.type.to_pandas_dtype() else: data_dtype = pa_arr.type.to_pandas_dtype() if buffers[1]: padata = Buffer(np.asarray(buffers[1]).view(data_dtype)) else: padata = Buffer.empty(0) pastrs = None if isinstance(pa_arr, pa.StringArray): pastrs = Buffer(np.asarray(buffers[2]).view(np.int8)) return (size, offset, pamask, padata, pastrs)
def write(self, content): from cudf.core.buffer import Buffer from cupy.cuda import MemoryPointer from cupy.cuda.memory import UnownedMemory if not self._has_write_headers: self._headers = headers = pickle.loads(content) buffer_types = headers["buffer_types"] for buffer_type, size in buffer_types: if buffer_type == "cuda": self._buffers.append(Buffer.empty(size)) else: self._buffers.append(BytesIO()) self._has_write_headers = True return cur_buf = self._buffers[self._cur_buffer_index] cur_buf_size = self._headers["buffer_types"][self._cur_buffer_index][1] if isinstance(cur_buf, Buffer): cur_cupy_memory = UnownedMemory(cur_buf.ptr, len(cur_buf), cur_buf) cupy_pointer = MemoryPointer(cur_cupy_memory, self._offset) if isinstance(content, bytes): content_length = len(content) source_mem = np.frombuffer( content, dtype="uint8").ctypes.data_as(ctypes.c_void_p) else: source_mem = MemoryPointer( UnownedMemory(content.ptr, len(content), content), 0) content_length = source_mem.mem.size cupy_pointer.copy_from(source_mem, content_length) else: content_length = len(content) cur_buf.write(content) if content_length + self._offset >= cur_buf_size: if isinstance(cur_buf, BytesIO): self._buffers[self._cur_buffer_index] = cur_buf.getvalue() self._cur_buffer_index += 1 self._offset = 0 else: self._offset += content_length
async def read_buffers(header: Dict, reader: StreamReader): if cupy is not None and cudf is not None: from cudf.core.buffer import Buffer as CPBuffer from cupy.cuda.memory import ( UnownedMemory as CPUnownedMemory, MemoryPointer as CPMemoryPointer, ) else: CPBuffer = CPUnownedMemory = CPMemoryPointer = None # construct a empty cuda buffer and copy from host is_cuda_buffers = header.get("is_cuda_buffers") buffer_sizes = header.pop(BUFFER_SIZES_NAME) buffers = [] for is_cuda_buffer, buf_size in zip(is_cuda_buffers, buffer_sizes): if is_cuda_buffer: # pragma: no cover if buf_size == 0: content = await reader.readexactly(buf_size) buffers.append(content) else: cuda_buffer = CPBuffer.empty(buf_size) cupy_memory = CPUnownedMemory(cuda_buffer.ptr, buf_size, cuda_buffer) offset = 0 chunk_size = CUDA_CHUNK_SIZE while offset < buf_size: read_size = (chunk_size if (offset + chunk_size) < buf_size else buf_size - offset) content = await reader.readexactly(read_size) source_mem = np.frombuffer( content, dtype="uint8").ctypes.data_as(ctypes.c_void_p) cupy_pointer = CPMemoryPointer(cupy_memory, offset) cupy_pointer.copy_from(source_mem, len(content)) offset += read_size buffers.append(cuda_buffer) else: buffers.append(await reader.readexactly(buf_size)) return buffers
def read(self, size: int): # we read cuda_header first and then read cuda buffers one by one, # the return value's size is not exactly the specified size. from cudf.core.buffer import Buffer from cupy.cuda import MemoryPointer from cupy.cuda.memory import UnownedMemory if not self._has_read_headers: self._has_read_headers = True return pickle.dumps(self._headers) if len(self._buffers) == 0: return "" cur_buf = self._buffers[0] # current buf read to end if cur_buf is None: # empty cuda buffer content = Buffer.empty(0) self._offset = 0 self._buffers.pop(0) return content elif size >= cur_buf.size - self._offset: if isinstance(cur_buf, UnownedMemory): cupy_pointer = MemoryPointer(cur_buf, self._offset) content = Buffer(cupy_pointer.ptr, size=cur_buf.size - self._offset) else: content = cur_buf[self._offset:self._offset + size] self._offset = 0 self._buffers.pop(0) return content else: if isinstance(cur_buf, UnownedMemory): cupy_pointer = MemoryPointer(cur_buf, self._offset) self._offset += size return Buffer(cupy_pointer.ptr, size=size) else: self._offset += size return cur_buf[self._offset, self._offset + size]
def as_numerical_column(self, dtype, **kwargs): mem_dtype = np.dtype(dtype) str_dtype = mem_dtype out_dtype = mem_dtype if mem_dtype.type in (np.int8, np.int16): mem_dtype = np.dtype(np.int32) str_dtype = mem_dtype elif mem_dtype.type is np.datetime64: kwargs.update(units=np.datetime_data(mem_dtype)[0]) mem_dtype = np.dtype(np.int64) if "format" not in kwargs: if len(self.nvstrings) > 0: # infer on host from the first not na element fmt = pd.core.tools.datetimes._guess_datetime_format( self[self.notna()][0]) kwargs.update(format=fmt) else: fmt = None out_arr = rmm.device_array(shape=len(self), dtype=mem_dtype) out_ptr = libcudf.cudf.get_ctype_ptr(out_arr) kwargs.update({"devptr": out_ptr}) _str_to_numeric_typecast_functions[str_dtype](self.nvstrings, **kwargs) out_col = column.as_column(out_arr) if self.has_nulls: mask_size = utils.calc_chunk_size(len(self.nvstrings), utils.mask_bitsize) out_mask = Buffer.empty(mask_size) out_mask_ptr = out_mask.ptr self.nvstrings.set_null_bitmask(out_mask_ptr, bdevmem=True) out_col = out_col.set_mask(out_mask) return out_col.astype(out_dtype)
def buffers_from_pyarrow(pa_arr, dtype=None): from cudf.core.buffer import Buffer from cudf.utils.cudautils import copy_array buffers = pa_arr.buffers() if pa_arr.null_count: mask_dev_array = make_mask(len(pa_arr)) arrow_dev_array = rmm.to_device(np.asarray(buffers[0]).view("int8")) copy_array(arrow_dev_array, mask_dev_array) pamask = Buffer(mask_dev_array) else: pamask = None offset = pa_arr.offset size = pa_arr.offset + len(pa_arr) if dtype: data_dtype = dtype elif isinstance(pa_arr, pa.StringArray): data_dtype = np.int32 size = size + 1 # extra element holds number of bytes else: if isinstance(pa_arr, pa.DictionaryArray): data_dtype = pa_arr.indices.type.to_pandas_dtype() else: data_dtype = pa_arr.type.to_pandas_dtype() if buffers[1]: padata = Buffer( np.asarray(buffers[1]).view(data_dtype)[offset:offset + size]) else: padata = Buffer.empty(0) pastrs = None if isinstance(pa_arr, pa.StringArray): pastrs = Buffer(np.asarray(buffers[2]).view(np.int8)) return (pamask, padata, pastrs)
def as_column(arbitrary, nan_as_null=True, dtype=None, length=None): """Create a Column from an arbitrary object Parameters ---------- arbitrary : object Object to construct the Column from. See *Notes*. nan_as_null : bool,optional If True (default), treat NaN values in arbitrary as null. dtype : optional Optionally typecast the construted Column to the given dtype. length : int, optional If `arbitrary` is a scalar, broadcast into a Column of the given length. Returns ------- A Column of the appropriate type and size. Notes ----- Currently support inputs are: * ``Column`` * ``Series`` * ``Index`` * Scalars (can be broadcasted to a specified `length`) * Objects exposing ``__cuda_array_interface__`` (e.g., numba device arrays) * Objects exposing ``__array_interface__``(e.g., numpy arrays) * pyarrow array * pandas.Categorical objects """ from cudf.core.column import numerical, categorical, datetime, string from cudf.core.series import Series from cudf.core.index import Index if isinstance(arbitrary, ColumnBase): if dtype is not None: return arbitrary.astype(dtype) else: return arbitrary elif isinstance(arbitrary, Series): data = arbitrary._column if dtype is not None: data = data.astype(dtype) elif isinstance(arbitrary, Index): data = arbitrary._values if dtype is not None: data = data.astype(dtype) elif isinstance(arbitrary, nvstrings.nvstrings): byte_count = arbitrary.byte_count() if byte_count > libcudfxx.MAX_STRING_COLUMN_BYTES: raise MemoryError( "Cannot construct string columns " "containing > {} bytes. " "Consider using dask_cudf to partition " "your data.".format(libcudfxx.MAX_STRING_COLUMN_BYTES_STR) ) sbuf = Buffer.empty(arbitrary.byte_count()) obuf = Buffer.empty( (arbitrary.size() + 1) * np.dtype("int32").itemsize ) nbuf = None if arbitrary.null_count() > 0: mask_size = calc_chunk_size(arbitrary.size(), mask_bitsize) nbuf = Buffer.empty(mask_size) arbitrary.set_null_bitmask(nbuf.ptr, bdevmem=True) arbitrary.to_offsets(sbuf.ptr, obuf.ptr, None, bdevmem=True) children = ( build_column(obuf, dtype="int32"), build_column(sbuf, dtype="int8"), ) data = build_column( data=None, dtype="object", mask=nbuf, children=children ) data._nvstrings = arbitrary elif isinstance(arbitrary, Buffer): if dtype is None: raise TypeError(f"dtype cannot be None if 'arbitrary' is a Buffer") data = build_column(arbitrary, dtype=dtype) elif cuda.devicearray.is_cuda_ndarray(arbitrary): data = as_column(Buffer(arbitrary), dtype=arbitrary.dtype) if ( data.dtype in [np.float16, np.float32, np.float64] and arbitrary.size > 0 ): if nan_as_null: mask = libcudf.unaryops.nans_to_nulls(data) data = data.set_mask(mask) elif data.dtype.kind == "M": null = column_empty_like(data, masked=True, newsize=1) col = libcudf.replace.replace( as_column(Buffer(arbitrary), dtype=arbitrary.dtype), as_column( Buffer(np.array([np.datetime64("NaT")], dtype=data.dtype)), dtype=arbitrary.dtype, ), null, ) data = datetime.DatetimeColumn( data=Buffer(arbitrary), dtype=data.dtype, mask=col.mask ) elif hasattr(arbitrary, "__cuda_array_interface__"): desc = arbitrary.__cuda_array_interface__ data = _data_from_cuda_array_interface_desc(arbitrary) mask = _mask_from_cuda_array_interface_desc(arbitrary) dtype = np.dtype(desc["typestr"]) col = build_column(data, dtype=dtype, mask=mask) return col elif isinstance(arbitrary, np.ndarray): # CUDF assumes values are always contiguous if not arbitrary.flags["C_CONTIGUOUS"]: arbitrary = np.ascontiguousarray(arbitrary) if dtype is not None: arbitrary = arbitrary.astype(dtype) if arbitrary.dtype.kind == "M": data = datetime.DatetimeColumn.from_numpy(arbitrary) elif arbitrary.dtype.kind in ("O", "U"): data = as_column(pa.Array.from_pandas(arbitrary)) else: data = as_column(rmm.to_device(arbitrary), nan_as_null=nan_as_null) elif isinstance(arbitrary, pa.Array): if isinstance(arbitrary, pa.StringArray): pa_size, pa_offset, nbuf, obuf, sbuf = buffers_from_pyarrow( arbitrary ) children = ( build_column(data=obuf, dtype="int32"), build_column(data=sbuf, dtype="int8"), ) data = string.StringColumn( mask=nbuf, children=children, size=pa_size, offset=pa_offset ) elif isinstance(arbitrary, pa.NullArray): new_dtype = pd.api.types.pandas_dtype(dtype) if (type(dtype) == str and dtype == "empty") or dtype is None: new_dtype = pd.api.types.pandas_dtype( arbitrary.type.to_pandas_dtype() ) if is_categorical_dtype(new_dtype): arbitrary = arbitrary.dictionary_encode() else: if nan_as_null: arbitrary = arbitrary.cast(np_to_pa_dtype(new_dtype)) else: # casting a null array doesn't make nans valid # so we create one with valid nans from scratch: if new_dtype == np.dtype("object"): arbitrary = utils.scalar_broadcast_to( None, (len(arbitrary),), dtype=new_dtype ) else: arbitrary = utils.scalar_broadcast_to( np.nan, (len(arbitrary),), dtype=new_dtype ) data = as_column(arbitrary, nan_as_null=nan_as_null) elif isinstance(arbitrary, pa.DictionaryArray): codes = as_column(arbitrary.indices) if isinstance(arbitrary.dictionary, pa.NullArray): categories = as_column([], dtype="object") else: categories = as_column(arbitrary.dictionary) dtype = CategoricalDtype( categories=categories, ordered=arbitrary.type.ordered ) data = categorical.CategoricalColumn( dtype=dtype, mask=codes.base_mask, children=(codes,), size=codes.size, offset=codes.offset, ) elif isinstance(arbitrary, pa.TimestampArray): dtype = np.dtype("M8[{}]".format(arbitrary.type.unit)) pa_size, pa_offset, pamask, padata, _ = buffers_from_pyarrow( arbitrary, dtype=dtype ) data = datetime.DatetimeColumn( data=padata, mask=pamask, dtype=dtype, size=pa_size, offset=pa_offset, ) elif isinstance(arbitrary, pa.Date64Array): raise NotImplementedError pa_size, pa_offset, pamask, padata, _ = buffers_from_pyarrow( arbitrary, dtype="M8[ms]" ) data = datetime.DatetimeColumn( data=padata, mask=pamask, dtype=np.dtype("M8[ms]"), size=pa_size, offset=pa_offset, ) elif isinstance(arbitrary, pa.Date32Array): # No equivalent np dtype and not yet supported warnings.warn( "Date32 values are not yet supported so this will " "be typecast to a Date64 value", UserWarning, ) data = as_column(arbitrary.cast(pa.int32())).astype("M8[ms]") elif isinstance(arbitrary, pa.BooleanArray): # Arrow uses 1 bit per value while we use int8 dtype = np.dtype(np.bool) # Needed because of bug in PyArrow # https://issues.apache.org/jira/browse/ARROW-4766 if len(arbitrary) > 0: arbitrary = arbitrary.cast(pa.int8()) else: arbitrary = pa.array([], type=pa.int8()) pa_size, pa_offset, pamask, padata, _ = buffers_from_pyarrow( arbitrary, dtype=dtype ) data = numerical.NumericalColumn( data=padata, mask=pamask, dtype=dtype, size=pa_size, offset=pa_offset, ) else: pa_size, pa_offset, pamask, padata, _ = buffers_from_pyarrow( arbitrary ) data = numerical.NumericalColumn( data=padata, dtype=np.dtype(arbitrary.type.to_pandas_dtype()), mask=pamask, size=pa_size, offset=pa_offset, ) elif isinstance(arbitrary, pa.ChunkedArray): gpu_cols = [ as_column(chunk, dtype=dtype) for chunk in arbitrary.chunks ] if dtype and dtype != "empty": new_dtype = dtype else: pa_type = arbitrary.type if pa.types.is_dictionary(pa_type): new_dtype = "category" else: new_dtype = np.dtype(pa_type.to_pandas_dtype()) data = ColumnBase._concat(gpu_cols, dtype=new_dtype) elif isinstance(arbitrary, (pd.Series, pd.Categorical)): if is_categorical_dtype(arbitrary): data = as_column(pa.array(arbitrary, from_pandas=True)) elif arbitrary.dtype == np.bool: # Bug in PyArrow or HDF that requires us to do this data = as_column(pa.array(np.asarray(arbitrary), from_pandas=True)) else: data = as_column(pa.array(arbitrary, from_pandas=nan_as_null)) elif isinstance(arbitrary, pd.Timestamp): # This will always treat NaTs as nulls since it's not technically a # discrete value like NaN data = as_column(pa.array(pd.Series([arbitrary]), from_pandas=True)) elif np.isscalar(arbitrary) and not isinstance(arbitrary, memoryview): length = length or 1 data = as_column( utils.scalar_broadcast_to(arbitrary, length, dtype=dtype) ) if not nan_as_null: data = data.fillna(np.nan) elif isinstance(arbitrary, memoryview): data = as_column( np.asarray(arbitrary), dtype=dtype, nan_as_null=nan_as_null ) else: try: data = as_column( memoryview(arbitrary), dtype=dtype, nan_as_null=nan_as_null ) except TypeError: pa_type = None np_type = None try: if dtype is not None: dtype = pd.api.types.pandas_dtype(dtype) if is_categorical_dtype(dtype): raise TypeError else: np_type = np.dtype(dtype).type if np_type == np.bool_: pa_type = pa.bool_() else: pa_type = np_to_pa_dtype(np.dtype(dtype)) data = as_column( pa.array(arbitrary, type=pa_type, from_pandas=nan_as_null), dtype=dtype, nan_as_null=nan_as_null, ) except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError): if is_categorical_dtype(dtype): sr = pd.Series(arbitrary, dtype="category") data = as_column(sr, nan_as_null=nan_as_null) elif np_type == np.str_: sr = pd.Series(arbitrary, dtype="str") data = as_column(sr, nan_as_null=nan_as_null) else: data = as_column( np.asarray(arbitrary, dtype=np.dtype(dtype)), nan_as_null=nan_as_null, ) return data
def _concat(cls, objs, dtype=None): from cudf.core.series import Series from cudf.core.column import ( StringColumn, CategoricalColumn, NumericalColumn, ) if len(objs) == 0: dtype = pd.api.types.pandas_dtype(dtype) if is_categorical_dtype(dtype): dtype = CategoricalDtype() return column_empty(0, dtype=dtype, masked=True) # If all columns are `NumericalColumn` with different dtypes, # we cast them to a common dtype. # Notice, we can always cast pure null columns not_null_cols = list(filter(lambda o: len(o) != o.null_count, objs)) if len(not_null_cols) > 0 and ( len( [ o for o in not_null_cols if not isinstance(o, NumericalColumn) or np.issubdtype(o.dtype, np.datetime64) ] ) == 0 ): col_dtypes = [o.dtype for o in not_null_cols] # Use NumPy to find a common dtype common_dtype = np.find_common_type(col_dtypes, []) # Cast all columns to the common dtype for i in range(len(objs)): objs[i] = objs[i].astype(common_dtype) # Find the first non-null column: head = objs[0] for i, obj in enumerate(objs): if len(obj) != obj.null_count: head = obj break for i, obj in enumerate(objs): # Check that all columns are the same type: if not pd.api.types.is_dtype_equal(objs[i].dtype, head.dtype): # if all null, cast to appropriate dtype if len(obj) == obj.null_count: from cudf.core.column import column_empty_like objs[i] = column_empty_like( head, dtype=head.dtype, masked=True, newsize=len(obj) ) # Handle categories for categoricals if all(isinstance(o, CategoricalColumn) for o in objs): cats = ( Series(ColumnBase._concat([o.categories for o in objs])) .drop_duplicates() ._column ) objs = [ o.cat()._set_categories(cats, is_unique=True) for o in objs ] head = objs[0] for obj in objs: if not (obj.dtype == head.dtype): raise ValueError("All series must be of same type") newsize = sum(map(len, objs)) if newsize > libcudfxx.MAX_COLUMN_SIZE: raise MemoryError( "Result of concat cannot have " "size > {}".format(libcudfxx.MAX_COLUMN_SIZE_STR) ) # Handle strings separately if all(isinstance(o, StringColumn) for o in objs): result_nbytes = sum(o._nbytes for o in objs) if result_nbytes > libcudfxx.MAX_STRING_COLUMN_BYTES: raise MemoryError( "Result of concat cannot have > {} bytes".format( libcudfxx.MAX_STRING_COLUMN_BYTES_STR ) ) objs = [o.nvstrings for o in objs] return as_column(nvstrings.from_strings(*objs)) # Filter out inputs that have 0 length objs = [o for o in objs if len(o) > 0] nulls = any(col.nullable for col in objs) if is_categorical_dtype(head): data_dtype = head.codes.dtype data = None children = (column_empty(newsize, dtype=head.codes.dtype),) else: data_dtype = head.dtype data = Buffer.empty(size=newsize * data_dtype.itemsize) children = () # Allocate output mask only if there's nulls in the input objects mask = None if nulls: mask = Buffer(utils.make_mask(newsize)) col = build_column( data=data, dtype=head.dtype, mask=mask, children=children ) # Performance the actual concatenation if newsize > 0: col = libcudf.concat._column_concat(objs, col) return col