def astype(self, dtype): if self.dtype == dtype: return self elif self.dtype != 'int64' and np.issubdtype(dtype, np.datetime64): return self.astype('int64').astype(dtype) else: return Buffer(cudautils.astype(self.mem, dtype=dtype))
def element_indexing(self, arg): if isinstance(arg, Number): arg = int(arg) if arg < 0: arg = len(self) + arg if arg > (len(self) - 1): raise IndexError out = self._data[arg] elif isinstance(arg, slice): out = self._data[arg] elif isinstance(arg, list): out = self._data[arg] elif isinstance(arg, np.ndarray): gpu_arr = rmm.to_device(arg) return self.element_indexing(gpu_arr) elif isinstance(arg, DeviceNDArray): # NVStrings gather call expects an array of int32s arg = cudautils.astype(arg, np.dtype('int32')) if len(arg) > 0: gpu_ptr = get_ctype_ptr(arg) out = self._data.gather(gpu_ptr, len(arg)) else: out = self._data.gather([]) else: raise NotImplementedError(type(arg)) if len(out) == 1: return out.to_host()[0] else: return columnops.as_column(out)
def sort(self, segments, col_keys, col_vals): seg_dtype = np.uint32 segsize_limit = 2 ** 16 - 1 d_fullsegs = rmm.device_array(segments.size + 1, dtype=seg_dtype) d_begins = d_fullsegs[:-1] d_ends = d_fullsegs[1:] # Note: .astype is required below because .copy_to_device # is just a plain memcpy d_begins.copy_to_device(cudautils.astype(segments, dtype=seg_dtype)) d_ends[-1:].copy_to_device(np.require([self.nelem], dtype=seg_dtype)) # The following is to handle the segument size limit due to # max CUDA grid size. range0 = range(0, segments.size, segsize_limit) range1 = itertools.chain(range0[1:], [segments.size]) for s, e in zip(range0, range1): segsize = e - s libgdf.gdf_segmented_radixsort_generic(self.plan, col_keys.cffi_view, col_vals.cffi_view, segsize, unwrap_devary(d_begins[s:]), unwrap_devary(d_ends[s:]))
def extend(self, array): needed = array.size self._sentry_capacity(needed) array = cudautils.astype(array, dtype=self.dtype) self.mem[self.size:self.size + needed].copy_to_device(array) self.size += needed
def _find_first_and_last(self, value): found_indices = self.str().contains(f"^{value}$").data.mem found_indices = cudautils.astype(found_indices, "int32") first = columnops.as_column(found_indices).find_first_value(1) last = columnops.as_column(found_indices).find_last_value(1) return first, last