def take(self, indices, ignore_index=False): """Return Series by taking values from the corresponding *indices*. """ indices = Buffer(indices).to_gpu_array() # Handle zero size if indices.size == 0: return self._copy_construct(data=self.data[:0], index=self.index[:0]) if self.dtype == np.dtype("object"): return self[indices] data = cudautils.gather(data=self.data.to_gpu_array(), index=indices) if self._column.mask: mask = self._get_mask_as_series().take(indices).as_mask() mask = Buffer(mask) else: mask = None if ignore_index: index = RangeIndex(indices.size) else: index = self.index.take(indices) col = self._column.replace(data=Buffer(data), mask=mask) return self._copy_construct(data=col, index=index)
def unique(self, method='sort'): # method variable will indicate what algorithm to use to # calculate unique, not used right now if method is not 'sort': msg = 'non sort based unique() not implemented yet' raise NotImplementedError(msg) segs, sortedvals = self._unique_segments() # gather result out = cudautils.gather(data=sortedvals, index=segs) return self.replace(data=Buffer(out), mask=None)
def take(self, indices): assert indices.dtype.kind in 'iu' if indices.size == 0: # Empty indices return RangeIndex(indices.size) else: # Gather index = cudautils.gather(data=self.gpu_values, index=indices) col = self.as_column().replace(data=Buffer(index)) return GenericIndex(col)
def value_counts(self, method='sort'): if method is not 'sort': msg = 'non sort based value_count() not implemented yet' raise NotImplementedError(msg) segs, sortedvals = self._unique_segments() # Return both values and their counts out1 = cudautils.gather(data=sortedvals, index=segs) out2 = cudautils.value_count(segs, len(sortedvals)) out_vals = self.replace(data=Buffer(out1), mask=None) out_counts = NumericalColumn(data=Buffer(out2), dtype=np.intp) return out_vals, out_counts
def column_select_by_position(column, positions): """Select by a series of dtype int64 indicating positions. Returns (selected_column, selected_positions) """ from cudf.dataframe.numerical import NumericalColumn assert column.null_count == 0 selvals = cudautils.gather(column.data.to_gpu_array(), positions.data.to_gpu_array()) selected_values = column.replace(data=Buffer(selvals)) selected_index = Buffer(positions.data.to_gpu_array()) return selected_values, NumericalColumn(data=selected_index, dtype=selected_index.dtype)
def take(self, indices): """Gather only the specific subset of indices Parameters --- indices: An array-like that maps to values contained in this Index. """ assert indices.dtype.kind in 'iu' if indices.size == 0: # Empty indices return RangeIndex(indices.size) else: # Gather index = cudautils.gather(data=self.gpu_values, index=indices) col = self.as_column().replace(data=Buffer(index)) return as_index(col)
def take(self, indices, ignore_index=False): """Return Column by taking values from the corresponding *indices*. """ indices = Buffer(indices).to_gpu_array() # Handle zero size if indices.size == 0: return self.copy() data = cudautils.gather(data=self._data.to_gpu_array(), index=indices) if self._mask: mask = self._get_mask_as_column().take(indices).as_mask() mask = Buffer(mask) else: mask = None return self.replace(data=Buffer(data), mask=mask)
def sort_by_values(self, ascending=True, na_position="last"): sort_inds = get_sorted_inds(self, ascending, na_position) col_keys = cudautils.gather(data=self.data.mem, index=sort_inds.data.mem) mask = None if self.mask: mask = self._get_mask_as_column()\ .take(sort_inds.data.to_gpu_array()).as_mask() mask = Buffer(mask) col_keys = self.replace(data=Buffer(col_keys), mask=mask, null_count=self.null_count, dtype=self.dtype) col_inds = self.replace(data=sort_inds.data, mask=sort_inds.mask, dtype=sort_inds.data.dtype) return col_keys, col_inds