def __getitem__(self, arg): if isinstance(arg, Number): arg = int(arg) return self.element_indexing(arg) elif isinstance(arg, slice): # compute mask slice start, stop = utils.normalize_slice(arg, len(self)) if self.null_count > 0: if arg.step is not None and arg.step != 1: raise NotImplementedError(arg) # slicing data subdata = self.data[arg] # slicing mask bytemask = cudautils.expand_mask_bits( self.data.size, self.mask.to_gpu_array(), ) submask = Buffer(cudautils.compact_mask_bytes(bytemask[arg])) col = self.replace(data=subdata, mask=submask) return col else: newbuffer = self.data[arg] return self.replace(data=newbuffer) else: raise NotImplementedError(type(arg))
def to_pandas(self): arr = self.data_array_view sr = pd.Series(arr.copy_to_host()) if self.nullable: mask_bytes = (cudautils.expand_mask_bits( len(self), self.mask_array_view).copy_to_host().astype(bool)) sr[~mask_bytes] = None return sr
def to_arrow(self): mask = None if self.has_null_mask: # Necessary because PyArrow doesn't support from_buffers for # DictionaryArray yet mask = pa.array( # Why does expand_mask_bits return as int32? cudautils.expand_mask_bits( len(self), self.nullmask.mem).copy_to_host().astype('int8')) indices = pa.array(self.cat().codes.data.mem.copy_to_host()) ordered = self.cat()._ordered dictionary = pa.array(self.cat().categories) return pa.DictionaryArray.from_arrays(indices=indices, dictionary=dictionary, mask=mask, from_pandas=True, ordered=ordered)
def __getitem__(self, arg): from cudf.dataframe import columnops if isinstance(arg, Number): arg = int(arg) return self.element_indexing(arg) elif isinstance(arg, slice): # compute mask slice if self.null_count > 0: if arg.step is not None and arg.step != 1: raise NotImplementedError(arg) # slicing data subdata = self.data[arg] # slicing mask if self.dtype == "object": data_size = self.data.size() else: data_size = self.data.size bytemask = cudautils.expand_mask_bits(data_size, self.mask.to_gpu_array()) submask = Buffer(cudautils.compact_mask_bytes(bytemask[arg])) col = self.replace(data=subdata, mask=submask) return col else: newbuffer = self.data[arg] return self.replace(data=newbuffer) else: arg = columnops.as_column(arg) if len(arg) == 0: arg = columnops.as_column([], dtype="int32") if pd.api.types.is_integer_dtype(arg.dtype): return self.take(arg.data.mem) if pd.api.types.is_bool_dtype(arg.dtype): return self.apply_boolean_mask(arg) raise NotImplementedError(type(arg))
def __getitem__(self, arg): from cudf.core.column import column if isinstance(arg, Number): arg = int(arg) return self.element_indexing(arg) elif isinstance(arg, slice): if is_categorical_dtype(self): codes = self.codes[arg] return build_column( data=None, dtype=self.dtype, mask=codes.mask, children=(codes,), ) start, stop, stride = arg.indices(len(self)) if start == stop: return column_empty(0, self.dtype, masked=True) # compute mask slice if self.has_nulls: if arg.step is not None and arg.step != 1: raise NotImplementedError(arg) # slicing data slice_data = self.data_array_view[arg] # slicing mask data_size = self.size bytemask = cudautils.expand_mask_bits( data_size, self.mask_array_view ) slice_mask = cudautils.compact_mask_bytes(bytemask[arg]) else: slice_data = self.data_array_view[arg] slice_mask = None if self.dtype == "object": return as_column(slice_data) else: if arg.step is not None and arg.step != 1: slice_data = cudautils.as_contiguous(slice_data) slice_data = Buffer(slice_data) else: # data Buffer lifetime is tied to self: slice_data = Buffer( data=slice_data.device_ctypes_pointer.value, size=slice_data.nbytes, owner=self, ) # mask Buffer lifetime is not: if slice_mask is not None: slice_mask = Buffer(slice_mask) return build_column(slice_data, self.dtype, mask=slice_mask) else: arg = column.as_column(arg) if len(arg) == 0: arg = column.as_column([], dtype="int32") if pd.api.types.is_integer_dtype(arg.dtype): return self.take(arg) if pd.api.types.is_bool_dtype(arg.dtype): return self.apply_boolean_mask(arg) raise NotImplementedError(type(arg))