Exemplo n.º 1
0
def buffers_from_pyarrow(pa_arr, dtype=None):
    """
    Given a pyarrow array returns a 5 length tuple of:
        - size
        - offset
        - cudf.Buffer --> mask
        - cudf.Buffer --> data
        - cudf.Buffer --> string characters
    """
    from cudf._lib.null_mask import bitmask_allocation_size_bytes

    buffers = pa_arr.buffers()

    if pa_arr.null_count:
        mask_size = bitmask_allocation_size_bytes(len(pa_arr))
        pamask = pyarrow_buffer_to_cudf_buffer(buffers[0], mask_size=mask_size)
    else:
        pamask = None

    offset = pa_arr.offset
    size = len(pa_arr)

    if buffers[1]:
        padata = pyarrow_buffer_to_cudf_buffer(buffers[1])
    else:
        padata = Buffer.empty(0)

    pastrs = None
    if isinstance(pa_arr, pa.StringArray):
        pastrs = pyarrow_buffer_to_cudf_buffer(buffers[2])
    return (size, offset, pamask, padata, pastrs)
Exemplo n.º 2
0
def random_bitmask(size):
    """
    Parameters
    ----------
    size : int
        number of bits
    """
    sz = bitmask_allocation_size_bytes(size)
    data = np.random.randint(0, 255, dtype="u1", size=sz)
    return data.view("i1")