示例#1
0
def apply_join(col_lhs, col_rhs, how, method='hash'):
    """Returns a tuple of the left and right joined indices as gpu arrays.
    """
    if (len(col_lhs) != len(col_rhs)):
        msg = "Unequal #columns in list 'col_lhs' and list 'col_rhs'"
        raise ValueError(msg)

    joiner = _join_how_api[how]
    method_api = _join_method_api[method]
    gdf_context = ffi.new('gdf_context*')

    if method == 'hash':
        libgdf.gdf_context_view(gdf_context, 0, method_api, 0)
    elif method == 'sort':
        libgdf.gdf_context_view(gdf_context, 1, method_api, 0)
    else:
        msg = "method not supported"
        raise ValueError(msg)

    col_result_l = columnview(0, None, dtype=np.int32)
    col_result_r = columnview(0, None, dtype=np.int32)

    if (how in ['left', 'inner']):
        list_lhs = []
        list_rhs = []
        for i in range(len(col_lhs)):
            list_lhs.append(col_lhs[i].cffi_view)
            list_rhs.append(col_rhs[i].cffi_view)

        # Call libgdf

        joiner(len(col_lhs), list_lhs, list_rhs, col_result_l, col_result_r,
               gdf_context)
    else:
        joiner(col_lhs[0].cffi_view, col_rhs[0].cffi_view, col_result_l,
               col_result_r)

    # Extract result

    # yield ((ary[0], ary[1]) if datasize > 0 else (ary, ary))

    left = _as_numba_devarray(intaddr=int(
        ffi.cast("uintptr_t", col_result_l.data)),
                              nelem=col_result_l.size,
                              dtype=np.int32)

    right = _as_numba_devarray(intaddr=int(
        ffi.cast("uintptr_t", col_result_r.data)),
                               nelem=col_result_r.size,
                               dtype=np.int32)

    yield (left, right)

    libgdf.gdf_column_free(col_result_l)
    libgdf.gdf_column_free(col_result_r)
示例#2
0
def _call_join_multi(api, ncols, col_left, col_right, ctxt):
    l_res = new_column()
    r_res = new_column()

    api(ncols, col_left, col_right, l_res, r_res, ctxt)

    l_idx = _copy_int_col_to_arr(l_res)
    r_idx = _copy_int_col_to_arr(r_res)

    joined_idx = np.array([l_idx, r_idx])

    libgdf.gdf_column_free(l_res)
    libgdf.gdf_column_free(r_res)
    return joined_idx