def predict(self, X): self.module_.eval() # batched predictions, because ram! pred_means = [] pred_vars = [] for idx in np.arange(0, X.shape[0], self.batch_size): predictive_means, predictive_variances = \ self.module_.predict(to_tensor(X[idx: min(idx + self.batch_size, X.shape[0])], self.device)) pred_means.append(predictive_means) pred_vars.append(predictive_variances) predictive_means = torch.cat(pred_means, dim=1) predictive_variances = torch.cat(pred_vars, dim=1) outut_dim = int(self.module__output_size / 2) mean = predictive_means.mean(0)[..., :outut_dim] epistemic_var = predictive_variances.mean(0)[..., :outut_dim] softplus = torch.nn.Softplus() aleotoric_var = softplus(predictive_means.mean(0)[..., outut_dim:])**2 var = epistemic_var + aleotoric_var return np.stack( [to_numpy(mean), to_numpy(var), to_numpy(epistemic_var), to_numpy(aleotoric_var)], -1)
def combine_uncertainties(preds, output_size): # assuming network predicts mean and std of aleotoric uncertainty # first mean of all output dims then std of all output dims # to be used with Heteroscedastic loss class # combined mean outut_dim = int(output_size / 2) mean = preds[..., :outut_dim].mean(dim=1) # use softplus to be numerically stable and not depend on activation functions of nn softplus = torch.nn.Softplus() aleo_sampled = softplus(preds[..., outut_dim:]) # combined approx variance from paper # Kendall, A., & Gal, Y. (2017). # What uncertainties do we need in bayesian deep learning for computer vision?. # In Advances in neural information processing systems (pp. 5574-5584). # also: # Lakshminarayanan, B., Pritzel, A., & Blundell, C. (2017). # Simple and scalable predictive uncertainty estimation using deep ensembles. # In Advances in neural information processing systems (pp. 6402-6413). var = (preds[..., :outut_dim]**2).mean(dim=1) - mean**2 + (aleo_sampled** 2).mean(dim=1) epistemic_std = preds[..., :outut_dim].std(dim=1) aleotoric_std = aleo_sampled.mean(dim=1) return np.stack([ to_numpy(mean), to_numpy(var), to_numpy(epistemic_std**2), to_numpy(aleotoric_std**2) ], -1)
def get_loss(self, y_pred, y_true, X=None, training=False): y_true = to_tensor(y_true, device='cpu') loss_a = torch.abs(y_true.float() - y_pred[:, 1]).mean() loss_b = ((y_true.float() - y_pred[:, 1]) ** 2).mean() if training: self.history.record_batch('loss_a', to_numpy(loss_a)) self.history.record_batch('loss_b', to_numpy(loss_b)) return loss_a + loss_b
def get_loss(self, y_pred, y_true, X=None, training=False): y_true = to_var(y_true, use_cuda=False) loss_a = torch.abs(y_true.float() - y_pred[:, 1]).mean() loss_b = ((y_true.float() - y_pred[:, 1])**2).mean() if training: self.history.record_batch('loss_a', to_numpy(loss_a)[0]) self.history.record_batch('loss_b', to_numpy(loss_b)[0]) return loss_a + loss_b
def target_extractor(y): extracted = [] for batch in y: energy_targets = to_numpy(batch[0]) if len(batch) == 2: force_targets = to_numpy(batch[1]) extracted.append([energy_targets, force_targets]) elif len(batch) == 1: extracted.append([energy_targets, None]) return extracted
def _predict_with_std(self, X): nonlin = self._get_predict_nonlinearity() y_preds, y_stds = [], [] for yi in self.forward_iter(X, training=False): posterior = yi[0] if isinstance(yi, tuple) else yi y_preds.append(to_numpy(nonlin(posterior.mean))) y_stds.append(to_numpy(nonlin(posterior.stddev))) y_pred = np.concatenate(y_preds, 0) y_std = np.concatenate(y_stds, 0) return y_pred, y_std
def test_dropout(self, net_fit, data): # Note: does not test that dropout is really active during # training. X = data[0] # check that dropout not active by default y_proba = to_numpy(net_fit.forward(X)) y_proba2 = to_numpy(net_fit.forward(X)) assert np.allclose(y_proba, y_proba2, rtol=1e-7) # check that dropout can be activated y_proba = to_numpy(net_fit.forward(X, training=True)) y_proba2 = to_numpy(net_fit.forward(X, training=True)) assert not np.allclose(y_proba, y_proba2, rtol=1e-7)
def predict(self, X): self.module_.eval() output_size = self.module__output_size assert output_size % 2 == 0 outut_dim = int(output_size / 2) pred = to_tensor(self.predict_proba(to_tensor(X, device=self.device)), self.device) mean = pred[..., :outut_dim] # use softplus to be numerically stable and not depend on activation functions of nn softplus = torch.nn.Softplus() std = softplus(pred[..., outut_dim:]) return np.stack([to_numpy(mean), to_numpy(std**2)], -1)
def predict(self, X: Union[torch.Tensor, SliceDict], type: str = 'mean', *args, **kwargs) -> np.ndarray: """ Return an attribute of the distribution (by default the mean) as a numpy array. """ X = to_tensor(X, device=self.device, dtype=self.module_dtype_) y_out = [] for params in self.forward_iter(X, training=False): batch_size = len(params[0]) distribution_kwargs = dict( zip(self.distribution_param_names_, params)) dist = self.distribution(**distribution_kwargs) yp = getattr(dist, type) if callable(yp): yp = yp(*args, **kwargs) yp = to_numpy(yp) if yp.shape[0] != batch_size: raise RuntimeError( f"`{self.distribution.__name__}.{type}` produced a tensor whose leading dim is {yp.shape[0]}, " f"expected {batch_size}.") y_out.append(yp) y_out = np.concatenate(y_out, 0) return y_out
def __call__(self, dataset, y=None, groups=None): bad_y_error = ValueError( "Stratified CV requires explicitely passing a suitable y.") if (y is None) and self.stratified: raise bad_y_error cv = self.check_cv(y) if self.stratified and not self._is_stratified(cv): raise bad_y_error # pylint: disable=invalid-name len_dataset = get_len(dataset) if y is not None: len_y = get_len(y) if len_dataset != len_y: raise ValueError("Cannot perform a CV split if dataset and y " "have different lengths.") args = (np.arange(len_dataset), ) if self._is_stratified(cv): args = args + (to_numpy(y), ) idx_train, idx_valid = next(iter(cv.split(*args, groups=groups))) dataset_train = torch.utils.data.Subset(dataset, idx_train) dataset_valid = torch.utils.data.Subset(dataset, idx_valid) return dataset_train, dataset_valid
def predict(self, X): """Where applicable, return class labels for samples in X. If the module's forward method returns multiple outputs as a tuple, it is assumed that the first output contains the relevant information and the other values are ignored. If all values are relevant, consider using :func:`~skorch.NeuralNet.forward` instead. Parameters ---------- X : input data, compatible with skorch.dataset.Dataset By default, you should be able to pass: * numpy arrays * torch tensors * pandas DataFrame or Series * a dictionary of the former three * a list/tuple of the former three * a Dataset If this doesn't work with your data, you have to pass a ``Dataset`` that can deal with the data. Returns ------- y_pred : numpy ndarray """ y_preds = [] for yp in self.forward_iter(X, training=False): yp = yp[0] if isinstance(yp, tuple) else yp y_preds.append(to_numpy(yp.max(-1)[-1])) y_pred = np.concatenate(y_preds, 0) return y_pred
def transform(self, X): out = [] for outs in self.forward_iter(X, training=False): outs = outs[1] if isinstance(outs, tuple) else outs out.append(to_numpy(outs)) transforms = np.concatenate(out, 0) return transforms
def __call__(self, dataset, y=None, groups=None): bad_y_error = ValueError( "Stratified CV requires explicitely passing a suitable y.") if (y is None) and self.stratified: raise bad_y_error cv = self.check_cv(y) if self.stratified and not self._is_stratified(cv): raise bad_y_error # pylint: disable=invalid-name len_dataset = get_len(dataset) if y is not None: len_y = get_len(y) if len_dataset != len_y: raise ValueError("Cannot perform a CV split if dataset and y " "have different lengths.") args = (np.arange(len_dataset),) if self._is_stratified(cv): args = args + (to_numpy(y),) idx_train, idx_valid = next(iter(cv.split(*args, groups=groups))) dataset_train = torch.utils.data.dataset.Subset(dataset, idx_train) dataset_valid = torch.utils.data.dataset.Subset(dataset, idx_valid) return dataset_train, dataset_valid
def predict_proba(self, X): """Where applicable, return probability estimates for samples. Parameters ---------- X : input data, compatible with skorch.dataset.Dataset By default, you should be able to pass: * numpy arrays * torch tensors * pandas DataFrame or Series * a dictionary of the former three * a list/tuple of the former three If this doesn't work with your data, you have to pass a ``Dataset`` that can deal with the data. Returns ------- y_proba : numpy ndarray """ y_probas = [] for yp in self.forward_iter(X, training=False): y_probas.append(to_numpy(yp)) y_proba = np.concatenate(y_probas, 0) return y_proba
def predict_proba(self, X): """Return the output of the module's forward method as a numpy array. If forward returns multiple outputs as a tuple, it is assumed that the first output contains the relevant information. The other values are ignored. Parameters ---------- X : input data, compatible with skorch.dataset.Dataset By default, you should be able to pass: * numpy arrays * torch tensors * pandas DataFrame or Series * a dictionary of the former three * a list/tuple of the former three If this doesn't work with your data, you have to pass a ``Dataset`` that can deal with the data. Returns ------- y_proba : numpy ndarray """ y_probas = [] for yp in self.forward_iter(X, training=False): yp = yp[0] if isinstance(yp, tuple) else yp y_probas.append(to_numpy(yp)) y_proba = np.concatenate(y_probas, 0) return y_proba
def compute_amplitude_gradients_for_X(model, X): device = next(model.parameters()).device ffted = np.fft.rfft(X, axis=2) amps = np.abs(ffted) phases = np.angle(ffted) amps_th = to_tensor(amps.astype(np.float32), device=device).requires_grad_(True) phases_th = to_tensor(phases.astype(np.float32), device=device).requires_grad_(True) fft_coefs = amps_th.unsqueeze(-1) * torch.stack( (torch.cos(phases_th), torch.sin(phases_th)), dim=-1) fft_coefs = fft_coefs.squeeze(3) iffted = torch.irfft(fft_coefs, signal_ndim=1, signal_sizes=(X.shape[2], )) outs = model(iffted) n_filters = outs.shape[1] amp_grads_per_filter = np.full((n_filters, ) + ffted.shape, np.nan, dtype=np.float32) for i_filter in range(n_filters): mean_out = torch.mean(outs[:, i_filter]) mean_out.backward(retain_graph=True) amp_grads = to_numpy(amps_th.grad.clone()) amp_grads_per_filter[i_filter] = amp_grads amps_th.grad.zero_() assert not np.any(np.isnan(amp_grads_per_filter)) return amp_grads_per_filter
def __call__(self, X, y, groups=None): bad_y_error = ValueError("Stratified CV not possible with given y.") if (y is None) and self.stratified: raise bad_y_error cv = self.check_cv(y) if self.stratified and not self._is_stratified(cv): raise bad_y_error # pylint: disable=invalid-name len_X = get_len(X) if y is not None: len_y = get_len(y) if len_X != len_y: raise ValueError("Cannot perform a CV split if X and y " "have different lengths.") args = (np.arange(len_X), ) if self._is_stratified(cv): args = args + (to_numpy(y), ) idx_train, idx_valid = next(iter(cv.split(*args, groups=groups))) X_train = multi_indexing(X, idx_train) X_valid = multi_indexing(X, idx_valid) y_train = None if y is None else multi_indexing(y, idx_train) y_valid = None if y is None else multi_indexing(y, idx_valid) return X_train, X_valid, y_train, y_valid
def test_schedule_is_effective(self, net_cls, classifier_module, classifier_data, param_mapper): from skorch.utils import to_numpy, noop from skorch.utils import freeze_parameter, unfreeze_parameter def schedule(net): if len(net.history) == 1: return freeze_parameter elif len(net.history) == 2: return unfreeze_parameter return noop net = net_cls( classifier_module, max_epochs=1, callbacks=[ param_mapper( ['sequential.*.weight', 'sequential.3.bias'], schedule=schedule, ), ]) net.initialize() # epoch 1, freezing parameters net.partial_fit(*classifier_data) assert not net.module_.sequential[0].weight.requires_grad assert not net.module_.sequential[3].weight.requires_grad assert net.module_.sequential[0].bias.requires_grad assert not net.module_.sequential[3].bias.requires_grad dense0_weight_pre = to_numpy(net.module_.sequential[0].weight).copy() dense1_weight_pre = to_numpy(net.module_.sequential[3].weight).copy() dense0_bias_pre = to_numpy(net.module_.sequential[0].bias).copy() dense1_bias_pre = to_numpy(net.module_.sequential[3].bias).copy() # epoch 2, unfreezing parameters net.partial_fit(*classifier_data) assert net.module_.sequential[0].weight.requires_grad assert net.module_.sequential[3].weight.requires_grad assert net.module_.sequential[0].bias.requires_grad assert net.module_.sequential[3].bias.requires_grad # epoch 3, modifications should have been made net.partial_fit(*classifier_data) dense0_weight_post = to_numpy(net.module_.sequential[0].weight).copy() dense1_weight_post = to_numpy(net.module_.sequential[3].weight).copy() dense0_bias_post = to_numpy(net.module_.sequential[0].bias).copy() dense1_bias_post = to_numpy(net.module_.sequential[3].bias).copy() assert not np.allclose(dense0_weight_pre, dense0_weight_post) assert not np.allclose(dense1_weight_pre, dense1_weight_post) assert not np.allclose(dense0_bias_pre, dense0_bias_post) assert not np.allclose(dense1_bias_pre, dense1_bias_post)
def score(self,X,target): ''' redefine scoring method to be the same as the one of kaggle (log_loss) ''' y_preds = [] for yp in self.forward_iter(X, training=False): y_preds.append(to_numpy(yp.sigmoid())) y_preds = np.concatenate(y_preds, 0) return log_loss(target,y_preds)
def on_epoch_end(self, net, **kwargs): epochs = len(net.history) if not (epochs % self.every_n_epochs): p_t = list(net.module_.named_parameters()) p_t = dict(p_t) for k, v in p_t.items(): p_t[k] = to_numpy(v) self.params.append(p_t) self.epochs.append(epochs - 1)
def test_forward(self, net_fit, data): X = data[0] n = len(X) y_forward = net_fit.forward(X) assert is_torch_data_type(y_forward) # Expecting (number of samples, number of output units) assert y_forward.shape == (n, 2) y_proba = net_fit.predict_proba(X) assert np.allclose(to_numpy(y_forward), y_proba)
def check_data(self, X, y): if ((y is None) and (not is_dataset(X)) and (self.iterator_train is DataLoader)): msg = ("No y-values are given (y=None). You must either supply a " "Dataset as X or implement your own DataLoader for " "training (and your validation) and supply it using the " "``iterator_train`` and ``iterator_valid`` parameters " "respectively.") raise ValueError(msg) if y is not None: # pylint: disable=attribute-defined-outside-init self.classes_inferred_ = np.unique(to_numpy(y))
def test_no_parameter_updates_when_norm_0( self, classifier_module, classifier_data): from copy import deepcopy from skorch import NeuralNetClassifier from skorch.callbacks import GradientNormClipping net = NeuralNetClassifier( classifier_module, callbacks=[('grad_norm', GradientNormClipping(0))], train_split=None, warm_start=True, max_epochs=1, ) net.initialize() params_before = deepcopy(list(net.module_.parameters())) net.fit(*classifier_data) params_after = net.module_.parameters() for p0, p1 in zip(params_before, params_after): p0, p1 = to_numpy(p0), to_numpy(p1) assert np.allclose(p0, p1)
def test_no_parameter_updates_when_norm_0(self, classifier_module, classifier_data): from copy import deepcopy from skorch import NeuralNetClassifier from skorch.callbacks import GradientNormClipping net = NeuralNetClassifier( classifier_module, callbacks=[('grad_norm', GradientNormClipping(0))], train_split=None, warm_start=True, max_epochs=1, ) net.initialize() params_before = deepcopy(list(net.module_.parameters())) net.fit(*classifier_data) params_after = net.module_.parameters() for p0, p1 in zip(params_before, params_after): p0, p1 = to_numpy(p0), to_numpy(p1) assert np.allclose(p0, p1)
def predict_proba(self, X): nonlin = self._get_predict_nonlinearity() y_probas = [] for yp in self.forward_iter(X, training=False): yp = yp if isinstance(yp, tuple) else yp yp = nonlin(yp) y_probas.append(to_numpy(yp)) stacked = list(zip(*y_probas)) y_proba = [concatenate(array) for array in stacked] return y_proba
def _predict(self, X): # When return_std is False, turn on skip_posterior_variances -- this # avoids doing the math for the posterior variances altogether, which # will save a great deal of compute. nonlin = self._get_predict_nonlinearity() y_preds = [] with gpytorch.settings.skip_posterior_variances(): for yi in self.forward_iter(X, training=False): posterior = yi[0] if isinstance(yi, tuple) else yi y_preds.append(to_numpy(nonlin(posterior.mean))) y_pred = np.concatenate(y_preds, 0) return y_pred
def test_initialization_is_effective(self, net_cls, classifier_module, classifier_data, initializer, mod_init, weight_pattern): from torch.nn.init import constant_ from skorch.utils import to_numpy module = classifier_module() if mod_init else classifier_module net = net_cls( module, lr=0, max_epochs=1, callbacks=[ initializer(weight_pattern, partial(constant_, val=5)), initializer('sequential.3.bias', partial(constant_, val=10)), ]) net.fit(*classifier_data) assert np.allclose(to_numpy(net.module_.sequential[0].weight), 5) assert np.allclose(to_numpy(net.module_.sequential[3].weight), 5) assert np.allclose(to_numpy(net.module_.sequential[3].bias), 10)
def test_unfreezing_is_effective(self, net_cls, classifier_module, classifier_data, freezer, unfreezer): from skorch.utils import to_numpy net = net_cls( classifier_module, max_epochs=1, callbacks=[ freezer('sequential.*.weight'), freezer('sequential.3.bias'), unfreezer('sequential.*.weight', at=2), unfreezer('sequential.3.bias', at=2), ]) net.initialize() # epoch 1, freezing parameters net.partial_fit(*classifier_data) assert not net.module_.sequential[0].weight.requires_grad assert not net.module_.sequential[3].weight.requires_grad assert net.module_.sequential[0].bias.requires_grad assert not net.module_.sequential[3].bias.requires_grad dense0_weight_pre = to_numpy(net.module_.sequential[0].weight).copy() dense1_weight_pre = to_numpy(net.module_.sequential[3].weight).copy() dense0_bias_pre = to_numpy(net.module_.sequential[0].bias).copy() dense1_bias_pre = to_numpy(net.module_.sequential[3].bias).copy() # epoch 2, unfreezing parameters net.partial_fit(*classifier_data) assert net.module_.sequential[0].weight.requires_grad assert net.module_.sequential[3].weight.requires_grad assert net.module_.sequential[0].bias.requires_grad assert net.module_.sequential[3].bias.requires_grad # epoch 3, modifications should have been made net.partial_fit(*classifier_data) dense0_weight_post = to_numpy(net.module_.sequential[0].weight).copy() dense1_weight_post = to_numpy(net.module_.sequential[3].weight).copy() dense0_bias_post = to_numpy(net.module_.sequential[0].bias).copy() dense1_bias_post = to_numpy(net.module_.sequential[3].bias).copy() assert not np.allclose(dense0_weight_pre, dense0_weight_post) assert not np.allclose(dense1_weight_pre, dense1_weight_post) assert not np.allclose(dense0_bias_pre, dense0_bias_post) assert not np.allclose(dense1_bias_pre, dense1_bias_post)
def check_cv(self, y): """Resolve which cross validation strategy is used.""" y_arr = None if self.stratified: # Try to convert y to numpy for sklearn's check_cv; if conversion # doesn't work, still try. try: y_arr = to_numpy(y) except (AttributeError, TypeError): y_arr = y if self._is_float(self.cv): return self._check_cv_float() return self._check_cv_non_float(y_arr)
def test_freezing_is_effective(self, net_cls, classifier_module, classifier_data, freezer, mod_init, mod_kwargs): from skorch.utils import to_numpy module = classifier_module() if mod_init else classifier_module net = net_cls( module, max_epochs=2, callbacks=[ freezer('sequential.*.weight'), freezer('sequential.3.bias'), ], **mod_kwargs) net.initialize() assert net.module_.sequential[0].weight.requires_grad assert net.module_.sequential[3].weight.requires_grad assert net.module_.sequential[0].bias.requires_grad assert net.module_.sequential[3].bias.requires_grad dense0_weight_pre = to_numpy(net.module_.sequential[0].weight).copy() dense1_weight_pre = to_numpy(net.module_.sequential[3].weight).copy() dense0_bias_pre = to_numpy(net.module_.sequential[0].bias).copy() dense1_bias_pre = to_numpy(net.module_.sequential[3].bias).copy() # use partial_fit to not re-initialize the module (weights) net.partial_fit(*classifier_data) dense0_weight_post = to_numpy(net.module_.sequential[0].weight).copy() dense1_weight_post = to_numpy(net.module_.sequential[3].weight).copy() dense0_bias_post = to_numpy(net.module_.sequential[0].bias).copy() dense1_bias_post = to_numpy(net.module_.sequential[3].bias).copy() assert not net.module_.sequential[0].weight.requires_grad assert not net.module_.sequential[3].weight.requires_grad assert net.module_.sequential[0].bias.requires_grad assert not net.module_.sequential[3].bias.requires_grad assert np.allclose(dense0_weight_pre, dense0_weight_post) assert np.allclose(dense1_weight_pre, dense1_weight_post) assert not np.allclose(dense0_bias_pre, dense0_bias_post) assert np.allclose(dense1_bias_pre, dense1_bias_post)
def predict_proba(self, X): """See ``NeuralNetClassifier.fit``. In contrast to ``NeuralNet.fit``, ``y`` is non-optional to avoid mistakenly forgetting about ``y``. However, ``y`` can be set to ``None`` in case it is derived dynamically from ``X``. """ y_logits = [] for yp in self.forward_iter(X, training=False): yp = yp[0] if isinstance(yp, tuple) else yp y_logits.append(to_numpy(yp)) y_logits = np.concatenate(y_logits, 0) return softmax(x=y_logits, axis=1)
def predict_with_window_inds_and_ys(self, dataset): preds = [] i_window_in_trials = [] i_window_stops = [] window_ys = [] for X, y, i in self.get_iterator(dataset, drop_index=False): i_window_in_trials.append(i[0].cpu().numpy()) i_window_stops.append(i[2].cpu().numpy()) preds.append(to_numpy(self.forward(X))) window_ys.append(y.cpu().numpy()) preds = np.concatenate(preds) i_window_in_trials = np.concatenate(i_window_in_trials) i_window_stops = np.concatenate(i_window_stops) window_ys = np.concatenate(window_ys) return dict( preds=preds, i_window_in_trials=i_window_in_trials, i_window_stops=i_window_stops, window_ys=window_ys)