def __init__( self, X, y=None, device=None, length=None, ): # TODO: Remove warning in release 0.4 if device is not None: warnings.warn( "device is no longer needed by Dataset and will be ignored.", DeprecationWarning) self.X = X self.y = y self.X_indexing = check_indexing(X) self.y_indexing = check_indexing(y) self.X_is_ndframe = is_pandas_ndframe(X) if length is not None: self._len = length return # pylint: disable=invalid-name len_X = get_len(X) if y is not None: len_y = get_len(y) if len_y != len_X: raise ValueError("X and y have inconsistent lengths.") self._len = len_X
def __getitem__(self, i): X, y = self.X, self.y if is_pandas_ndframe(X): X = {k: X[k].values.reshape(-1, 1) for k in X} Xi = multi_indexing(X, i) yi = y if y is None else multi_indexing(y, i) return self.transform(Xi, yi)
def __init__( self, X, y=None, length=None, ): self.X = X self.y = y self.X_indexing = check_indexing(X) self.y_indexing = check_indexing(y) self.X_is_ndframe = is_pandas_ndframe(X) if length is not None: self._len = length return # pylint: disable=invalid-name len_X = get_len(X) if y is not None: len_y = get_len(y) if len_y != len_X: raise ValueError("X and y have inconsistent lengths.") self._len = len_X
def to_tensor( X: Union[torch.Tensor, SliceDict], device: Union[torch.device, str], dtype: Optional[torch.dtype] = None) -> Union[torch.Tensor, SliceDict]: """ Behaves slightly differently than skorch's to_tensor: (a) DataFrames simply get their `values` extracted, (b) supports dtype conversion. """ if isinstance(X, dict): return SliceDict(**{ k: to_tensor(v, device=device, dtype=dtype) for k, v in X.items() }) elif isinstance(X, (list, tuple)): return type(X)(to_tensor(v, device=device, dtype=dtype) for v in X) else: if is_pandas_ndframe(X): X = X.values tensor = _to_tensor_base(X=X, device=device) if dtype is not None: tensor = tensor.to(dtype) return tensor
def _is_regular(self, x): return (x is None) or isinstance(x, np.ndarray) or is_pandas_ndframe(x)
def multi_indexing(data, i): """Perform indexing on multiple data structures. Currently supported data types: * numpy arrays * torch tensors * pandas NDFrame * a dictionary of the former three * a list/tuple of the former three ``i`` can be an integer or a slice. Example ------- >>> multi_indexing(np.asarray([1, 2, 3]), 0) 1 >>> multi_indexing(np.asarray([1, 2, 3]), np.s_[:2]) array([1, 2]) >>> multi_indexing(torch.arange(0, 4), np.s_[1:3]) 1 2 [torch.FloatTensor of size 2] >>> multi_indexing([[1, 2, 3], [4, 5, 6]], np.s_[:2]) [[1, 2], [4, 5]] >>> multi_indexing({'a': [1, 2, 3], 'b': [4, 5, 6]}, np.s_[-2:]) {'a': [2, 3], 'b': [5, 6]} >>> multi_indexing(pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})) a b 1 2 5 2 3 6 """ if isinstance(i, np.ndarray): if i.dtype == bool: i = tuple(j.tolist() for j in i.nonzero()) elif i.dtype == int: i = i.tolist() else: raise IndexError("arrays used as indices must be of integer " "(or boolean) type") if isinstance(data, dict): # dictionary of containers return {k: v[i] for k, v in data.items()} if isinstance(data, (list, tuple)): # list or tuple of containers try: return [multi_indexing(x, i) for x in data] except TypeError: pass if is_pandas_ndframe(data): # pandas NDFrame return data.iloc[i] # torch tensor, numpy ndarray, list if isinstance(i, (int, slice)): return data[i] return safe_indexing(data, i)