def __getitem__(self, arg): from cudf.dataframe.series import Series from cudf.dataframe.index import Index if isinstance( arg, (list, np.ndarray, pd.Series, range, Index, DeviceNDArray)): if len(arg) == 0: arg = Series(np.array([], dtype='int32')) else: arg = Series(arg) if isinstance(arg, Series): if arg.dtype in [np.bool, np.bool_]: return self._sr.iloc[arg] # To do this efficiently we need a solution to # https://github.com/rapidsai/cudf/issues/1087 out = Series([], dtype=self._sr.dtype, index=self._sr.index.__class__([])) for s in arg: out = out.append(self._sr.loc[s:s], ignore_index=False) return out elif is_single_value(arg): found_index = self._sr.index.find_label_range(arg, None)[0] return self._sr.iloc[found_index] elif isinstance(arg, slice): start_index, stop_index = self._sr.index.find_label_range( arg.start, arg.stop) return self._sr.iloc[start_index:stop_index:arg.step] else: raise NotImplementedError( ".loc not implemented for label type {}".format( type(arg).__name__))
def _downcast_to_series(self, df, arg): """ "Downcast" from a DataFrame to a Series based on Pandas indexing rules """ nrows, ncols = df.shape # determine the axis along which the Series is taken: if nrows == 1 and ncols == 1: if not is_single_value(arg[0]): axis = 1 else: axis = 0 elif nrows == 1: axis = 0 elif ncols == 1: axis = 1 else: raise ValueError("Cannot downcast DataFrame selection to Series") # take series along the axis: if axis == 1: return df[df.columns[0]] else: df = _normalize_dtypes(df) sr = df.T return sr[sr.columns[0]]
def fillna(self, fill_value, inplace=False): if is_single_value(fill_value): fill_value = np.datetime64(fill_value, 'ms') else: fill_value = columnops.as_column(fill_value, nan_as_null=False) result = cpp_replace.apply_replace_nulls(self, fill_value) result = result.replace(mask=None) return self._mimic_inplace(result, inplace)
def _is_scalar_access(self, arg): """ Determine if we are accessing a single value (scalar) """ if isinstance(arg, str): return False if not hasattr(arg, '__len__'): return False for obj in arg: if not is_single_value(obj): return False return True
def _can_downcast_to_series(self, df, arg): """ This method encapsulates the logic used to determine whether or not the result of a loc/iloc operation should be "downcasted" from a DataFrame to a Series """ nrows, ncols = df.shape if nrows == 1: if type(arg[0]) is slice: if not is_single_value(arg[1]): return False dtypes = df.dtypes.values.tolist() all_numeric = all( [pd.api.types.is_numeric_dtype(t) for t in dtypes]) all_identical = dtypes.count(dtypes[0]) == len(dtypes) if all_numeric or all_identical: return True if ncols == 1: if type(arg[1]) is slice: if not is_single_value(arg[0]): return False return True return False
def _get_column_selection(self, arg): if is_single_value(arg): return [arg] elif isinstance(arg, slice): start = self._df.columns[0] if arg.start is None else arg.start stop = self._df.columns[-1] if arg.stop is None else arg.stop cols = [] within_slice = False for c in self._df.columns: if c == start: within_slice = True if within_slice: cols.append(c) if c == stop: break return cols else: return arg
def _get_column_selection(self, arg): cols = self._df.columns if is_single_value(arg): return [cols[arg]] else: return cols[arg]