Exemplo n.º 1
0
    def test_chunk_size(self):
        """
        Test chunk_size
        """
        for chunk_size, expected_n_chunks in [(1, 13), (3, 5)]:
            with self.subTest(input='list',
                              chunk_size=chunk_size,
                              expected_n_chunks=expected_n_chunks):
                self.assertEqual(
                    get_n_chunks(self.test_data,
                                 iterable_len=None,
                                 chunk_size=chunk_size,
                                 n_splits=None,
                                 n_jobs=None), expected_n_chunks)

        for chunk_size, expected_n_chunks in [(1, 100), (3, 34)]:
            with self.subTest(input='list',
                              chunk_size=chunk_size,
                              expected_n_chunks=expected_n_chunks):
                self.assertEqual(
                    get_n_chunks(self.test_data_numpy,
                                 iterable_len=None,
                                 chunk_size=chunk_size,
                                 n_splits=None,
                                 n_jobs=None), expected_n_chunks)
Exemplo n.º 2
0
    def test_chunk_size_priority_over_n_splits(self):
        """
        chunk_size should have priority over n_splits
        """
        with self.subTest(input='list', chunk_size=1, n_splits=6):
            self.assertEqual(
                get_n_chunks(self.test_data,
                             iterable_len=None,
                             chunk_size=1,
                             n_splits=6,
                             n_jobs=None), 13)
        with self.subTest(input='numpy', chunk_size=1, n_splits=6):
            self.assertEqual(
                get_n_chunks(self.test_data_numpy,
                             iterable_len=None,
                             chunk_size=1,
                             n_splits=6,
                             n_jobs=None), 100)

        with self.subTest(input='list', chunk_size=3, n_splits=3):
            self.assertEqual(
                get_n_chunks(self.test_data,
                             iterable_len=None,
                             chunk_size=3,
                             n_splits=3,
                             n_jobs=None), 5)
        with self.subTest(input='numpy', chunk_size=3, n_splits=3):
            self.assertEqual(
                get_n_chunks(self.test_data_numpy,
                             iterable_len=None,
                             chunk_size=3,
                             n_splits=3,
                             n_jobs=None), 34)
Exemplo n.º 3
0
 def test_larger_iterable_len(self):
     """
     Test iterable_len, where iterable_len > len(input). Should ignores iterable_len when actual number of tasks is
     less, except when we use the data_generator function, in which case we cannot determine the actual number of
     elements.
     """
     with self.subTest(input='list'):
         self.assertEqual(
             get_n_chunks(self.test_data,
                          iterable_len=25,
                          chunk_size=None,
                          n_splits=None,
                          n_jobs=None), min(13,
                                            cpu_count() * 4))
     with self.subTest(input='numpy'):
         self.assertEqual(
             get_n_chunks(self.test_data_numpy,
                          iterable_len=125,
                          chunk_size=None,
                          n_splits=None,
                          n_jobs=None), min(100,
                                            cpu_count() * 4))
     with self.subTest(input='generator/iterator'):
         self.assertEqual(
             get_n_chunks(iter(self.test_data),
                          iterable_len=25,
                          chunk_size=None,
                          n_splits=None,
                          n_jobs=None), min(25,
                                            cpu_count() * 4))
Exemplo n.º 4
0
 def test_smaller_iterable_len(self):
     """
     Test iterable_len, where iterable_len < len(input)
     """
     with self.subTest(input='list'):
         self.assertEqual(
             get_n_chunks(self.test_data,
                          iterable_len=5,
                          chunk_size=None,
                          n_splits=None,
                          n_jobs=None), min(5,
                                            cpu_count() * 4))
     with self.subTest(input='numpy'):
         self.assertEqual(
             get_n_chunks(self.test_data_numpy,
                          iterable_len=5,
                          chunk_size=None,
                          n_splits=None,
                          n_jobs=None), min(5,
                                            cpu_count() * 4))
     with self.subTest(input='generator/iterator'):
         self.assertEqual(
             get_n_chunks(iter(self.test_data),
                          iterable_len=5,
                          chunk_size=None,
                          n_splits=None,
                          n_jobs=None), min(5,
                                            cpu_count() * 4))
Exemplo n.º 5
0
 def test_generator_input_with_no_iterable_len_raises(self):
     """
     When working with generators the iterable_len should be provided (the working examples are already tested above)
     """
     for chunk_size, n_splits, n_jobs in product([None, 1, 3], [None, 1, 3],
                                                 [None, 1, 3]):
         with self.subTest(chunk_size=chunk_size,
                           n_splits=n_splits,
                           n_jobs=n_jobs), self.assertRaises(ValueError):
             get_n_chunks(iter(self.test_data),
                          iterable_len=None,
                          chunk_size=chunk_size,
                          n_splits=n_splits,
                          n_jobs=n_jobs)
Exemplo n.º 6
0
    def test_n_splits(self):
        """
        Test n_splits. n_jobs shouldn't have any influence
        """
        for n_splits, n_jobs in product([1, 6], [None, 2, 8]):
            with self.subTest(input='list', n_splits=n_splits, n_jobs=n_jobs):
                self.assertEqual(
                    get_n_chunks(self.test_data,
                                 iterable_len=None,
                                 chunk_size=None,
                                 n_splits=n_splits,
                                 n_jobs=n_jobs), n_splits)

            with self.subTest(input='numpy', n_splits=n_splits, n_jobs=n_jobs):
                self.assertEqual(
                    get_n_chunks(self.test_data_numpy,
                                 iterable_len=None,
                                 chunk_size=None,
                                 n_splits=n_splits,
                                 n_jobs=n_jobs), n_splits)
Exemplo n.º 7
0
 def test_everything_none(self):
     """
     When everything is None we should use cpu_count * 4 as number of splits. We have to take the number of tasks
     into account
     """
     with self.subTest(input='list'):
         self.assertEqual(
             get_n_chunks(self.test_data,
                          iterable_len=None,
                          chunk_size=None,
                          n_splits=None,
                          n_jobs=None), min(13,
                                            cpu_count() * 4))
     with self.subTest(input='numpy'):
         self.assertEqual(
             get_n_chunks(self.test_data_numpy,
                          iterable_len=None,
                          chunk_size=None,
                          n_splits=None,
                          n_jobs=None), min(100,
                                            cpu_count() * 4))
Exemplo n.º 8
0
    def test_n_jobs(self):
        """
        When everything is None except n_jobs we should use n_jobs * 4 as number of splits. Again, taking into account
        the number of tasks
        """
        for n_jobs in [1, 6]:
            with self.subTest(input='list', n_jobs=n_jobs):
                self.assertEqual(
                    get_n_chunks(self.test_data,
                                 iterable_len=None,
                                 chunk_size=None,
                                 n_splits=None,
                                 n_jobs=n_jobs),
                    min(4 * n_jobs, len(self.test_data)))

            with self.subTest(input='numpy', n_jobs=n_jobs):
                self.assertEqual(
                    get_n_chunks(self.test_data_numpy,
                                 iterable_len=None,
                                 chunk_size=None,
                                 n_splits=None,
                                 n_jobs=n_jobs),
                    min(4 * n_jobs, len(self.test_data_numpy)))