def test_generate_single_sample(self): data = pd.DataFrame({"a": np.arange(10)}) test = (data, data, None) train = (data, data, None) gen = Sampler(train, test) samples = [s for s in gen.sample()] self.assertEqual(1, len(samples))
def test_generate_multiple_sample(self): data = pd.DataFrame({"a": np.arange(10)}) test = (data, data, None) train = (data, data, None) gen = Sampler(train, test, cross_validation=RandomSequences(0, 0.5, 12).cross_validation) samples = [s for s in gen.sample()] self.assertEqual(12, len(samples))
def predict(self, sampler: Sampler, **kwargs) -> np.ndarray: """ predict as many samples as we can sample from the sampler :param sampler: :return: """ # make shape (rows, samples, ...) return np.array([self.predict_sample(t[0]) for (t, _) in sampler.sample()]).swapaxes(0, 1)
def fit(self, sampler: Sampler, **kwargs) -> float: """ draws folds from the data generator as long as it yields new data and fits the model to one fold :param sampler: a data generating process class:`pandas_ml_utils.ml.data.splitting.sampeling.Sampler` :return: returns the average loss over oll folds """ # sample: train[features, labels, target, weights], test[features, labels, target, weights] losses = [ self.fit_fold(s[0][0], s[0][1], s[1][0], s[1][1], s[0][3], s[1][3], **kwargs) for s in sampler.sample() ] return np.array(losses).mean()
def fit(self, sampler: Sampler, **kwargs) -> float: """ draws folds from the data generator as long as it yields new data and fits the model to one fold :param sampler: a data generating process class:`pandas_ml_utils.ml.data.splitting.sampeling.Sampler` :return: returns the average loss over oll folds """ # sample: train[features, labels, target, weights], test[features, labels, target, weights] losses = [self.fit_fold(i, s[0][0], s[0][1], s[1][0], s[1][1], s[0][3], s[1][3], **kwargs) for i, s in enumerate(sampler.sample())] self._history = losses # this loss is used for hyper parameter tuning so we take the average of the minimum loss of each fold return np.array([(fold_loss[0].min() if fold_loss[0].size > 0 else np.nan) for fold_loss in losses]).mean()