def column_empty_like(column, dtype, masked): """Allocate a new column like the given *column* """ data = rmm.device_array(shape=len(column), dtype=dtype) params = dict(data=Buffer(data)) if masked: mask = utils.make_mask(data.size) params.update(dict(mask=Buffer(mask), null_count=data.size)) return Column(**params)
def from_cffi_view(cffi_view): """Create a Column object from a cffi struct gdf_column*. """ data_mem, mask_mem = _gdf.cffi_view_to_column_mem(cffi_view) data_buf = Buffer(data_mem) if mask_mem is not None: mask = Buffer(mask_mem) else: mask = None return Column(data=data_buf, mask=mask)
def _group_inner_levels(self, columns, rowidcol, segs, markers): """Group the second and onwards level. Parameters ---------- columns : sequence[str] Group keys. The order is important. rowid_column : str The name of the special column with the original rowid. It's internally used to determine the shuffling order. df : DataFrame The dataframe being grouped. segs : Series First level group begin offsets. Returns ------- (sorted_keys, reordering_indices, segments) - sorted_keys : list[Series] List of sorted key columns. Column order is same as arg *columns*. - reordering_indices : device array The indices to gather on to shuffle the dataframe into the grouped seqence. - segments : Series Group begin offsets. """ dsegs = segs.astype(dtype=np.int32).data.mem sorted_keys = [] plan_cache = {} for col in columns: # Shuffle the key column according to the previous groups srkeys = self._df[col].take(rowidcol.to_gpu_array(), ignore_index=True) # Segmented sort on the key shuf = Column(Buffer(cudautils.arange(len(srkeys)))) cache_key = (len(srkeys), srkeys.dtype, shuf.dtype) plan = plan_cache.get(cache_key) plan = apply_segsort(srkeys._column, shuf, dsegs, plan=plan) plan_cache[cache_key] = plan sorted_keys.append(srkeys) # keep sorted key cols # Determine segments dsegs, markers = cudautils.find_segments(srkeys.to_gpu_array(), dsegs, markers=markers) # Shuffle rowidcol = rowidcol.take(shuf.to_gpu_array(), ignore_index=True) reordering_indices = rowidcol.to_gpu_array() return sorted_keys, reordering_indices, Series(dsegs)
def column_empty_like_same_mask(column, dtype): """Create a new empty Column with the same length and the same mask. Parameters ---------- dtype : np.dtype like The dtype of the data buffer. """ data = rmm.device_array(shape=len(column), dtype=dtype) params = dict(data=Buffer(data)) if column.has_null_mask: params.update(mask=column.nullmask) return Column(**params)