def check_data(self, X, y): if ((y is None) and (not is_dataset(X)) and (self.iterator_train is DataLoader)): msg = ("No y-values are given (y=None). You must either supply a " "Dataset as X or implement your own DataLoader for " "training (and your validation) and supply it using the " "``iterator_train`` and ``iterator_valid`` parameters " "respectively.") raise ValueError(msg)
def get_dataset(self, X, y=None): """Get a dataset that contains the input data and is passed to the iterator. Override this if you want to initialize your dataset differently. If ``dataset__use_cuda`` is not set, use ``self.use_cuda`` instead. Parameters ---------- X : input data, compatible with skorch.dataset.Dataset By default, you should be able to pass: * numpy arrays * torch tensors * pandas DataFrame or Series * a dictionary of the former three * a list/tuple of the former three * a Dataset If this doesn't work with your data, you have to pass a ``Dataset`` that can deal with the data. y : target data, compatible with skorch.dataset.Dataset The same data types as for ``X`` are supported. If your X is a Dataset that contains the target, ``y`` may be set to None. Returns ------- dataset The initialized dataset. """ if is_dataset(X): return X dataset = self.dataset is_initialized = not callable(dataset) kwargs = self._get_params_for('dataset') if kwargs and is_initialized: raise TypeError("Trying to pass an initialized Dataset while " "passing Dataset arguments ({}) is not " "allowed.".format(kwargs)) if is_initialized: return dataset if 'use_cuda' not in kwargs: kwargs['use_cuda'] = self.use_cuda return dataset(X, y, **kwargs)
def check_data(self, X, y): if ((y is None) and (not is_dataset(X)) and (self.iterator_train is DataLoader)): msg = ("No y-values are given (y=None). You must either supply a " "Dataset as X or implement your own DataLoader for " "training (and your validation) and supply it using the " "``iterator_train`` and ``iterator_valid`` parameters " "respectively.") raise ValueError(msg) if y is not None: # pylint: disable=attribute-defined-outside-init self.classes_inferred_ = np.unique(y)
def forward_iter(self, X, training=False, location='cpu'): """Yield outputs of module forward calls on each batch of data. The storage location of the yielded tensors is determined by the ``location`` parameter. Parameters ---------- X : input data, compatible with skorch.dataset.Dataset By default, you should be able to pass: * numpy arrays * torch tensors * pandas DataFrame or Series * a dictionary of the former three * a list/tuple of the former three * a Dataset If this doesn't work with your data, you have to pass a ``Dataset`` that can deal with the data. training : bool (default=False) Whether to set the module to train mode or not. location : string (default='cpu') The location to store each inference result on. This defaults to CPU memory since there is genereally more memory available there. For performance reasons this might be changed to a specific CUDA device, e.g. 'cuda:0'. Yields ------ yp : torch tensor Result from a forward call on an individual batch. """ dataset = X if is_dataset(X) else self.get_dataset(X) iterator = self.get_iterator(dataset, training=training) storer = partial(torch.serialization.default_restore_location, location=location) for Xi, _ in iterator: yp = self.evaluation_step(Xi, training=training) if isinstance(yp, tuple): yield tuple(storer(n) for n in yp) else: yield storer(yp)
def check_data(self, X, y): if ((y is None) and (not is_dataset(X)) and (self.iterator_train is DataLoader)): raise ValueError("No y-values are given (y=None). You must " "implement your own DataLoader for training " "(and your validation) and supply it using the " "``iterator_train`` and ``iterator_valid`` " "parameters respectively.") elif y is None: # The user implements its own mechanism for generating y. return if get_dim(y) == 1: msg = ( "The target data shouldn't be 1-dimensional but instead have " "2 dimensions, with the second dimension having the same size " "as the number of regression targets (usually 1). Please " "reshape your target data to be 2-dimensional " "(e.g. y = y.reshape(-1, 1).") raise ValueError(msg)
def check_data(self, X, y): if ( (y is None) and (not is_dataset(X)) and (self.iterator_train is DataLoader) ): raise ValueError("No y-values are given (y=None). You must " "implement your own DataLoader for training " "(and your validation) and supply it using the " "``iterator_train`` and ``iterator_valid`` " "parameters respectively.") elif y is None: # The user implements its own mechanism for generating y. return # The problem with 1-dim float y is that the pytorch DataLoader will # somehow upcast it to DoubleTensor if get_dim(y) == 1: raise ValueError("The target data shouldn't be 1-dimensional; " "please reshape (e.g. y.reshape(-1, 1).")
def check_data(self, X, y): super().check_data(X, y) if (not is_dataset(X)) and (get_dim(y) != 1): raise ValueError("The target data should be 1-dimensional.")