def digitize( column: ColumnBase, bins: np.ndarray, right: bool = False ) -> ColumnBase: """Return the indices of the bins to which each value in column belongs. Parameters ---------- column : Column Input column. bins : Column-like 1-D column-like object of bins with same type as `column`, should be monotonically increasing. right : bool Indicates whether interval contains the right or left bin edge. Returns ------- A column containing the indices """ if not column.dtype == bins.dtype: raise ValueError( "Digitize() expects bins and input column have the same dtype." ) bin_col = as_column(bins, dtype=bins.dtype) if bin_col.nullable: raise ValueError("`bins` cannot contain null entries.") return as_column( libcudf.sort.digitize(column.as_frame(), bin_col.as_frame(), right) )
def digitize(column, bins, right=False): """Return the indices of the bins to which each value in column belongs. Parameters ---------- column : Column Input column. bins : np.array 1-D monotonically increasing array of bins with same type as `column`. right : bool Indicates whether interval contains the right or left bin edge. Returns ------- A device array containing the indices """ assert column.dtype == bins.dtype bins_buf = Buffer(bins.view("|u1")) bin_col = NumericalColumn(data=bins_buf, dtype=bins.dtype) return as_column( libcudf.sort.digitize(column.as_frame(), bin_col.as_frame(), right))