def test_results(self): data = self.random_table train_size, n_resamples = 0.6, 10 res = ShuffleSplit(data, [NaiveBayesLearner()], train_size=train_size, test_size=1 - train_size, n_resamples=n_resamples) self.assertEqual(len(res.predicted[0]), n_resamples * self.nrows * (1 - train_size))
def main(): # pragma: no cover # pylint: disable=import-outside-toplevel from Orange.evaluation import ShuffleSplit data = Table("brown-selected") if not "test_rows": # change to `if not "test_rows" to test columns data = append_column(data, "M", StringVariable("Test"), (np.arange(len(data)).reshape(-1, 1) % 30).astype(str)) res = ShuffleSplit(n_resamples=5, test_size=0.7, stratified=False) indices = iter(res.get_indices(data)) datasets = [] for i in range(1, 6): sample, _ = next(indices) data1 = data[sample] data1.name = chr(ord("A") + i) datasets.append((data1, i)) else: domain = data.domain data1 = data.transform(Domain(domain.attributes[:15], domain.class_var)) data2 = data.transform(Domain(domain.attributes[10:], domain.class_var)) datasets = [(data1, 1), (data2, 2)] WidgetPreview(OWVennDiagram).run(setData=datasets)
def test_init(self): res = ShuffleSplit(n_resamples=1, train_size=0.1, test_size=0.2, stratified=False, random_state=42) self.assertEqual(res.n_resamples, 1) self.assertEqual(res.train_size, 0.1) self.assertEqual(res.test_size, 0.2) self.assertFalse(res.stratified) self.assertEqual(res.random_state, 42)
def test_results(self): nrows, ncols = 100, 10 data = random_data(nrows, ncols) train_size, n_resamples = 0.6, 10 res = ShuffleSplit(data, [NaiveBayesLearner()], train_size=train_size, test_size=1 - train_size, n_resamples=n_resamples) self.assertEqual(len(res.predicted[0]), n_resamples * nrows * (1 - train_size))
def test_stratified(self): # strata size n = 50 res = ShuffleSplit( train_size=.5, test_size=.5, n_resamples=3, stratified=True, random_state=0)(self.iris, [NaiveBayesLearner()]) for fold in res.folds: self.assertEqual(np.count_nonzero(res.row_indices[fold] < n), n // 2) self.assertEqual(np.count_nonzero(res.row_indices[fold] < 2 * n), n)
def test_not_stratified(self): # strata size n = 50 res = ShuffleSplit(self.iris, [NaiveBayesLearner()], train_size=.5, test_size=.5, n_resamples=3, stratified=False, random_state=0) strata_samples = [] for train, test in res.indices: strata_samples.append(np.count_nonzero(train < n) == n/2) strata_samples.append(np.count_nonzero(train < 2 * n) == n) self.assertTrue(not all(strata_samples))
def test_not_stratified(self): # strata size n = 50 res = ShuffleSplit( train_size=.5, test_size=.5, n_resamples=3, stratified=False, random_state=0)(self.iris, [NaiveBayesLearner()]) strata_samples = [] for fold in res.folds: strata_samples += [ np.count_nonzero(res.row_indices[fold] < n) == n // 2, np.count_nonzero(res.row_indices[fold] < 2 * n) == n] self.assertTrue(not all(strata_samples))