def test_decide_dataframe_chunks(): with option_context() as options: options.chunk_store_limit = 64 memory_usage = pd.Series([8, 22.2, 4, 2, 11.2], index=list('abcde')) shape = (10, 5) nsplit = decide_dataframe_chunk_sizes(shape, None, memory_usage) for ns in nsplit: assert all(isinstance(i, Integral) for i in ns) is True assert shape == tuple(sum(ns) for ns in nsplit) nsplit = decide_dataframe_chunk_sizes(shape, {0: 4}, memory_usage) for ns in nsplit: assert all(isinstance(i, Integral) for i in ns) is True assert shape == tuple(sum(ns) for ns in nsplit) nsplit = decide_dataframe_chunk_sizes(shape, (2, 3), memory_usage) for ns in nsplit: assert all(isinstance(i, Integral) for i in ns) is True assert shape == tuple(sum(ns) for ns in nsplit) nsplit = decide_dataframe_chunk_sizes(shape, (10, 3), memory_usage) for ns in nsplit: assert all(isinstance(i, Integral) for i in ns) is True assert shape == tuple(sum(ns) for ns in nsplit) options.chunk_store_limit = 20 shape = (10, 5) nsplit = decide_dataframe_chunk_sizes(shape, None, memory_usage) for ns in nsplit: assert all(isinstance(i, Integral) for i in ns) is True assert shape == tuple(sum(ns) for ns in nsplit) nsplit = decide_dataframe_chunk_sizes(shape, {1: 3}, memory_usage) for ns in nsplit: assert all(isinstance(i, Integral) for i in ns) is True assert shape == tuple(sum(ns) for ns in nsplit) nsplit = decide_dataframe_chunk_sizes(shape, (2, 3), memory_usage) for ns in nsplit: assert all(isinstance(i, Integral) for i in ns) is True assert shape == tuple(sum(ns) for ns in nsplit) nsplit = decide_dataframe_chunk_sizes(shape, (10, 3), memory_usage) for ns in nsplit: assert all(isinstance(i, Integral) for i in ns) is True assert shape == tuple(sum(ns) for ns in nsplit)
def testDecideDataFrameChunks(self): with option_context() as options: options.chunk_store_limit = 64 memory_usage = pd.Series([8, 22.2, 4, 2, 11.2], index=list('abcde')) shape = (10, 5) nsplit = decide_dataframe_chunk_sizes(shape, None, memory_usage) [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit] self.assertEqual(shape, tuple(sum(ns) for ns in nsplit)) nsplit = decide_dataframe_chunk_sizes(shape, {0: 4}, memory_usage) [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit] self.assertEqual(shape, tuple(sum(ns) for ns in nsplit)) nsplit = decide_dataframe_chunk_sizes(shape, (2, 3), memory_usage) [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit] self.assertEqual(shape, tuple(sum(ns) for ns in nsplit)) nsplit = decide_dataframe_chunk_sizes(shape, (10, 3), memory_usage) [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit] self.assertEqual(shape, tuple(sum(ns) for ns in nsplit)) options.chunk_store_limit = 20 shape = (10, 5) nsplit = decide_dataframe_chunk_sizes(shape, None, memory_usage) [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit] self.assertEqual(shape, tuple(sum(ns) for ns in nsplit)) nsplit = decide_dataframe_chunk_sizes(shape, {1: 3}, memory_usage) [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit] self.assertEqual(shape, tuple(sum(ns) for ns in nsplit)) nsplit = decide_dataframe_chunk_sizes(shape, (2, 3), memory_usage) [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit] self.assertEqual(shape, tuple(sum(ns) for ns in nsplit)) nsplit = decide_dataframe_chunk_sizes(shape, (10, 3), memory_usage) [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit] self.assertEqual(shape, tuple(sum(ns) for ns in nsplit))