def buffers_from_pyarrow(pa_arr, dtype=None): """ Given a pyarrow array returns a 5 length tuple of: - size - offset - cudf.Buffer --> mask - cudf.Buffer --> data - cudf.Buffer --> string characters """ from cudf._lib.null_mask import bitmask_allocation_size_bytes buffers = pa_arr.buffers() if pa_arr.null_count: mask_size = bitmask_allocation_size_bytes(len(pa_arr)) pamask = pyarrow_buffer_to_cudf_buffer(buffers[0], mask_size=mask_size) else: pamask = None offset = pa_arr.offset size = len(pa_arr) if buffers[1]: padata = pyarrow_buffer_to_cudf_buffer(buffers[1]) else: padata = Buffer.empty(0) pastrs = None if isinstance(pa_arr, pa.StringArray): pastrs = pyarrow_buffer_to_cudf_buffer(buffers[2]) return (size, offset, pamask, padata, pastrs)
def random_bitmask(size): """ Parameters ---------- size : int number of bits """ sz = bitmask_allocation_size_bytes(size) data = np.random.randint(0, 255, dtype="u1", size=sz) return data.view("i1")