Пример #1
0
    def test_generate_single_sample(self):
        data = pd.DataFrame({"a": np.arange(10)})
        test = (data, data, None)
        train = (data, data, None)
        gen = Sampler(train, test)

        samples = [s for s in gen.sample()]

        self.assertEqual(1, len(samples))
Пример #2
0
    def test_generate_multiple_sample(self):
        data = pd.DataFrame({"a": np.arange(10)})
        test = (data, data, None)
        train = (data, data, None)
        gen = Sampler(train,
                      test,
                      cross_validation=RandomSequences(0, 0.5,
                                                       12).cross_validation)

        samples = [s for s in gen.sample()]

        self.assertEqual(12, len(samples))
Пример #3
0
    def predict(self, sampler: Sampler, **kwargs) -> np.ndarray:
        """
        predict as many samples as we can sample from the sampler

        :param sampler:
        :return:
        """
        # make shape (rows, samples, ...)
        return np.array([self.predict_sample(t[0]) for (t, _) in sampler.sample()]).swapaxes(0, 1)
Пример #4
0
    def fit(self, sampler: Sampler, **kwargs) -> float:
        """
        draws folds from the data generator as long as it yields new data and fits the model to one fold

        :param sampler: a data generating process class:`pandas_ml_utils.ml.data.splitting.sampeling.Sampler`
        :return: returns the average loss over oll folds
        """

        # sample: train[features, labels, target, weights], test[features, labels, target, weights]
        losses = [
            self.fit_fold(s[0][0], s[0][1], s[1][0], s[1][1], s[0][3], s[1][3],
                          **kwargs) for s in sampler.sample()
        ]
        return np.array(losses).mean()
Пример #5
0
    def fit(self, sampler: Sampler, **kwargs) -> float:
        """
        draws folds from the data generator as long as it yields new data and fits the model to one fold

        :param sampler: a data generating process class:`pandas_ml_utils.ml.data.splitting.sampeling.Sampler`
        :return: returns the average loss over oll folds
        """

        # sample: train[features, labels, target, weights], test[features, labels, target, weights]
        losses = [self.fit_fold(i, s[0][0], s[0][1], s[1][0], s[1][1], s[0][3], s[1][3], **kwargs)
                  for i, s in enumerate(sampler.sample())]

        self._history = losses

        # this loss is used for hyper parameter tuning so we take the average of the minimum loss of each fold
        return np.array([(fold_loss[0].min() if fold_loss[0].size > 0 else np.nan) for fold_loss in losses]).mean()