Exemplo n.º 1
0
    def __init__(self,
                 datamanager,
                 backend,
                 configuration=None,
                 with_predictions=False,
                 all_scoring_functions=False,
                 seed=1,
                 output_y_test=False,
                 num_run=None,
                 subsample=None):
        super(HoldoutEvaluator,
              self).__init__(datamanager,
                             backend,
                             configuration,
                             with_predictions=with_predictions,
                             all_scoring_functions=all_scoring_functions,
                             seed=seed,
                             output_y_test=output_y_test,
                             num_run=num_run,
                             subsample=subsample)

        classification = datamanager.info['task'] in CLASSIFICATION_TASKS
        self.X_train, self.X_optimization, self.Y_train, self.Y_optimization = \
            split_data(datamanager.data['X_train'],
                       datamanager.data['Y_train'],
                       classification=classification)
Exemplo n.º 2
0
 def test_split_classification_many_imbalanced_classes(self):
     for i in range(10):
         X = np.array([range(20), range(20)]).transpose()
         y = np.array(
             (0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5))
         np.random.shuffle(y)
         X_train, X_valid, Y_train, Y_valid = split_data(
             X, y, classification=True)
         self.assertLessEqual(max(Y_valid), 1)
Exemplo n.º 3
0
 def test_split_classification_many_imbalanced_classes(self):
     for i in range(10):
         X = np.array([range(20), range(20)]).transpose()
         y = np.array((0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
                       4, 5))
         np.random.shuffle(y)
         X_train, X_valid, Y_train, Y_valid = split_data(
             X, y,
             classification=True)
         print(X_train, Y_train)
         self.assertLessEqual(max(Y_valid), 1)
Exemplo n.º 4
0
    def _save_ensemble_data(self, X, y):
        """Split dataset and store Data for the ensemble script.

        :param X:
        :param y:
        :return:

        """
        task_name = 'LoadData'
        self._start_task(self._stopwatch, task_name)
        _, _, _, y_ensemble = resampling.split_data(X, y)
        self._backend.save_targets_ensemble(y_ensemble)
        self._stop_task(self._stopwatch, task_name)
Exemplo n.º 5
0
    def _save_ensemble_data(self, X, y):
        """Split dataset and store Data for the ensemble script.

        :param X:
        :param y:
        :return:

        """
        task_name = 'LoadData'
        self._start_task(self._stopwatch, task_name)
        _, _, _, y_ensemble = resampling.split_data(X, y)
        self._backend.save_targets_ensemble(y_ensemble)
        self._stop_task(self._stopwatch, task_name)
Exemplo n.º 6
0
    def _split_regular(self):
        X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
        y = np.array([0, 0, 0, 1, 1, 2])
        X_train, X_valid, Y_train, Y_valid = split_data(X, y)

        # Check shapes
        self.assertEqual(X_train.shape, (4, 2))
        self.assertEqual(Y_train.shape, (4, ))
        self.assertEqual(X_valid.shape, (2, 2))
        self.assertEqual(Y_valid.shape, (2, ))

        self.assertListEqual(list(Y_valid), [0, 0])
        self.assertListEqual(list(Y_train), [2, 0, 1, 1])
Exemplo n.º 7
0
    def _split_regular(self):
        X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
        y = np.array([0, 0, 0, 1, 1, 2])
        X_train, X_valid, Y_train, Y_valid = split_data(X, y)

        # Check shapes
        self.assertEqual(X_train.shape, (4, 2))
        self.assertEqual(Y_train.shape, (4, ))
        self.assertEqual(X_valid.shape, (2, 2))
        self.assertEqual(Y_valid.shape, (2, ))

        self.assertListEqual(list(Y_valid), [0, 0])
        self.assertListEqual(list(Y_train), [2, 0, 1, 1])
Exemplo n.º 8
0
    def _stratify(self):
        X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
        y = np.array([0, 0, 0, 0, 1, 1])
        X_train, X_valid, Y_train, Y_valid = split_data(X, y)

        # Check shapes
        self.assertEqual(X_train.shape[0], 4)
        self.assertEqual(X_train.shape[1], 2)
        self.assertEqual(Y_train.shape[0], 4)

        self.assertEqual(X_valid.shape[0], 2)
        self.assertEqual(X_valid.shape[1], 2)
        self.assertEqual(Y_valid.shape[0], 2)

        self.assertListEqual(list(Y_valid), [1, 0])
        self.assertListEqual(list(Y_train), [0, 0, 0, 1])
Exemplo n.º 9
0
    def _stratify(self):
        X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
        y = np.array([0, 0, 0, 0, 1, 1])
        X_train, X_valid, Y_train, Y_valid = split_data(X, y)

        # Check shapes
        self.assertEqual(X_train.shape[0], 4)
        self.assertEqual(X_train.shape[1], 2)
        self.assertEqual(Y_train.shape[0], 4)

        self.assertEqual(X_valid.shape[0], 2)
        self.assertEqual(X_valid.shape[1], 2)
        self.assertEqual(Y_valid.shape[0], 2)

        self.assertListEqual(list(Y_valid), [1, 0])
        self.assertListEqual(list(Y_train), [0, 0, 0, 1])
Exemplo n.º 10
0
    def test_split_data_regression(self):
        n_points = 1000
        np.random.seed(42)
        n_dims = np.random.randint(1, 100)
        X = np.random.rand(n_points, n_dims)
        y = np.random.rand(n_points)

        X_train, X_valid, Y_train, Y_valid = split_data(X, y)

        self.assertEqual(X_train.shape[0], 670)
        self.assertEqual(X_valid.shape[0], 330)
        self.assertEqual(Y_train.shape[0], 670)
        self.assertEqual(Y_valid.shape[0], 330)
        self.assertEqual(X_train.shape[1], n_dims)
        self.assertEqual(X_valid.shape[1], n_dims)

        # Random checks
        self.assertAlmostEqual(X_train[4, 2], 0.5986584841970366)
        self.assertAlmostEqual(X_valid[4, 2], 0.63911512838980322)
Exemplo n.º 11
0
    def test_split_data_regression(self):
        n_points = 1000
        np.random.seed(42)
        n_dims = np.random.randint(1, 100)
        X = np.random.rand(n_points, n_dims)
        y = np.random.rand(n_points)

        X_train, X_valid, Y_train, Y_valid = split_data(X, y)

        self.assertEqual(X_train.shape[0], 670)
        self.assertEqual(X_valid.shape[0], 330)
        self.assertEqual(Y_train.shape[0], 670)
        self.assertEqual(Y_valid.shape[0], 330)
        self.assertEqual(X_train.shape[1], n_dims)
        self.assertEqual(X_valid.shape[1], n_dims)

        # Random checks
        self.assertAlmostEqual(X_train[4, 2], 0.5986584841970366)
        self.assertAlmostEqual(X_valid[4, 2], 0.63911512838980322)
Exemplo n.º 12
0
    def __init__(self, datamanager, output_dir,
                 configuration=None,
                 with_predictions=False,
                 all_scoring_functions=False,
                 seed=1,
                 output_y_test=False,
                 num_run=None):
        super(HoldoutEvaluator, self).__init__(
            datamanager, output_dir, configuration,
            with_predictions=with_predictions,
            all_scoring_functions=all_scoring_functions,
            seed=seed,
            output_y_test=output_y_test,
            num_run=num_run)

        classification = datamanager.info['task'] in CLASSIFICATION_TASKS
        self.X_train, self.X_optimization, self.Y_train, self.Y_optimization = \
            split_data(datamanager.data['X_train'],
                       datamanager.data['Y_train'],
                       classification=classification)