Exemplo n.º 1
0
def cffi_view_to_column_mem(cffi_view):
    intaddr = int(ffi.cast("uintptr_t", cffi_view.data))
    data = rmm.device_array_from_ptr(intaddr,
                                     nelem=cffi_view.size,
                                     dtype=gdf_to_np_dtype(cffi_view.dtype),
                                     finalizer=rmm._make_finalizer(intaddr, 0))

    if cffi_view.valid:
        intaddr = int(ffi.cast("uintptr_t", cffi_view.valid))
        mask = rmm.device_array_from_ptr(
            intaddr,
            nelem=calc_chunk_size(cffi_view.size, mask_bitsize),
            dtype=mask_dtype,
            finalizer=rmm._make_finalizer(intaddr, 0))
    else:
        mask = None

    return data, mask
Exemplo n.º 2
0
def cffi_view_to_column_mem(cffi_view):
    gdf_dtype = cffi_view.dtype
    if gdf_dtype == libgdf.GDF_STRING_CATEGORY:
        data_ptr = int(ffi.cast("uintptr_t", cffi_view.data))
        # We need to create this just to make sure the memory is properly freed
        data = rmm.device_array_from_ptr(data_ptr,
                                         nelem=cffi_view.size,
                                         dtype='int32',
                                         finalizer=rmm._make_finalizer(
                                             data_ptr, 0))
        nvcat_ptr = int(ffi.cast("uintptr_t", cffi_view.dtype_info.category))
        nvcat_obj = nvcategory.bind_cpointer(nvcat_ptr)
        nvstr_obj = nvcat_obj.to_strings()
        mask = None
        if cffi_view.valid:
            mask_ptr = int(ffi.cast("uintptr_t", cffi_view.valid))
            mask = rmm.device_array_from_ptr(
                mask_ptr,
                nelem=calc_chunk_size(cffi_view.size, mask_bitsize),
                dtype=mask_dtype,
                finalizer=rmm._make_finalizer(mask_ptr, 0))
        return nvstr_obj, mask
    else:
        intaddr = int(ffi.cast("uintptr_t", cffi_view.data))
        data = rmm.device_array_from_ptr(
            intaddr,
            nelem=cffi_view.size,
            dtype=gdf_to_np_dtype(cffi_view.dtype),
            finalizer=rmm._make_finalizer(intaddr, 0))
        mask = None
        if cffi_view.valid:
            intaddr = int(ffi.cast("uintptr_t", cffi_view.valid))
            mask = rmm.device_array_from_ptr(
                intaddr,
                nelem=calc_chunk_size(cffi_view.size, mask_bitsize),
                dtype=mask_dtype,
                finalizer=rmm._make_finalizer(intaddr, 0))

        return data, mask
Exemplo n.º 3
0
Arquivo: _gdf.py Projeto: cuulee/cudf
def libgdf_join(col_lhs, col_rhs, on, how, method='sort'):
    joiner = _join_how_api[how]
    method_api = _join_method_api[method]
    gdf_context = ffi.new('gdf_context*')

    libgdf.gdf_context_view(gdf_context, 0, method_api, 0, 0, 0)

    if how not in ['left', 'inner', 'outer']:
        msg = "new join api only supports left or inner"
        raise ValueError(msg)

    list_lhs = []
    list_rhs = []
    result_cols = []

    result_col_names = []

    left_idx = []
    right_idx = []
    # idx = 0
    for name, col in col_lhs.items():
        list_lhs.append(col._column.cffi_view)
        if name not in on:
            result_cols.append(columnview(0, None, dtype=col._column.dtype))
            result_col_names.append(name)

    for name in on:
        result_cols.append(columnview(0, None,
                                      dtype=col_lhs[name]._column.dtype))
        result_col_names.append(name)
        left_idx.append(list(col_lhs.keys()).index(name))
        right_idx.append(list(col_rhs.keys()).index(name))

    for name, col in col_rhs.items():
        list_rhs.append(col._column.cffi_view)
        if name not in on:
            result_cols.append(columnview(0, None, dtype=col._column.dtype))
            result_col_names.append(name)

    num_cols_to_join = len(on)
    result_num_cols = len(list_lhs) + len(list_rhs) - num_cols_to_join

    joiner(list_lhs,
           len(list_lhs),
           left_idx,
           list_rhs,
           len(list_rhs),
           right_idx,
           num_cols_to_join,
           result_num_cols,
           result_cols,
           ffi.NULL,
           ffi.NULL,
           gdf_context)

    res = []
    valids = []

    for col in result_cols:
        intaddr = int(ffi.cast("uintptr_t", col.data))
        res.append(rmm.device_array_from_ptr(ptr=intaddr,
                                             nelem=col.size,
                                             dtype=gdf_to_np_dtype(col.dtype),
                                             finalizer=rmm._make_finalizer(
                                                 intaddr, 0)))
        intaddr = int(ffi.cast("uintptr_t", col.valid))
        valids.append(rmm.device_array_from_ptr(ptr=intaddr,
                                                nelem=calc_chunk_size(
                                                    col.size, mask_bitsize),
                                                dtype=mask_dtype,
                                                finalizer=rmm._make_finalizer(
                                                    intaddr, 0)))

    return res, valids