Пример #1
0
def test_decide_dataframe_chunks():
    with option_context() as options:
        options.chunk_store_limit = 64

        memory_usage = pd.Series([8, 22.2, 4, 2, 11.2], index=list('abcde'))

        shape = (10, 5)
        nsplit = decide_dataframe_chunk_sizes(shape, None, memory_usage)
        for ns in nsplit:
            assert all(isinstance(i, Integral) for i in ns) is True
        assert shape == tuple(sum(ns) for ns in nsplit)

        nsplit = decide_dataframe_chunk_sizes(shape, {0: 4}, memory_usage)
        for ns in nsplit:
            assert all(isinstance(i, Integral) for i in ns) is True
        assert shape == tuple(sum(ns) for ns in nsplit)

        nsplit = decide_dataframe_chunk_sizes(shape, (2, 3), memory_usage)
        for ns in nsplit:
            assert all(isinstance(i, Integral) for i in ns) is True
        assert shape == tuple(sum(ns) for ns in nsplit)

        nsplit = decide_dataframe_chunk_sizes(shape, (10, 3), memory_usage)
        for ns in nsplit:
            assert all(isinstance(i, Integral) for i in ns) is True
        assert shape == tuple(sum(ns) for ns in nsplit)

        options.chunk_store_limit = 20

        shape = (10, 5)
        nsplit = decide_dataframe_chunk_sizes(shape, None, memory_usage)
        for ns in nsplit:
            assert all(isinstance(i, Integral) for i in ns) is True
        assert shape == tuple(sum(ns) for ns in nsplit)

        nsplit = decide_dataframe_chunk_sizes(shape, {1: 3}, memory_usage)
        for ns in nsplit:
            assert all(isinstance(i, Integral) for i in ns) is True
        assert shape == tuple(sum(ns) for ns in nsplit)

        nsplit = decide_dataframe_chunk_sizes(shape, (2, 3), memory_usage)
        for ns in nsplit:
            assert all(isinstance(i, Integral) for i in ns) is True
        assert shape == tuple(sum(ns) for ns in nsplit)

        nsplit = decide_dataframe_chunk_sizes(shape, (10, 3), memory_usage)
        for ns in nsplit:
            assert all(isinstance(i, Integral) for i in ns) is True
        assert shape == tuple(sum(ns) for ns in nsplit)
Пример #2
0
    def testDecideDataFrameChunks(self):
        with option_context() as options:
            options.chunk_store_limit = 64

            memory_usage = pd.Series([8, 22.2, 4, 2, 11.2], index=list('abcde'))

            shape = (10, 5)
            nsplit = decide_dataframe_chunk_sizes(shape, None, memory_usage)
            [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit]
            self.assertEqual(shape, tuple(sum(ns) for ns in nsplit))

            nsplit = decide_dataframe_chunk_sizes(shape, {0: 4}, memory_usage)
            [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit]
            self.assertEqual(shape, tuple(sum(ns) for ns in nsplit))

            nsplit = decide_dataframe_chunk_sizes(shape, (2, 3), memory_usage)
            [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit]
            self.assertEqual(shape, tuple(sum(ns) for ns in nsplit))

            nsplit = decide_dataframe_chunk_sizes(shape, (10, 3), memory_usage)
            [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit]
            self.assertEqual(shape, tuple(sum(ns) for ns in nsplit))

            options.chunk_store_limit = 20

            shape = (10, 5)
            nsplit = decide_dataframe_chunk_sizes(shape, None, memory_usage)
            [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit]
            self.assertEqual(shape, tuple(sum(ns) for ns in nsplit))

            nsplit = decide_dataframe_chunk_sizes(shape, {1: 3}, memory_usage)
            [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit]
            self.assertEqual(shape, tuple(sum(ns) for ns in nsplit))

            nsplit = decide_dataframe_chunk_sizes(shape, (2, 3), memory_usage)
            [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit]
            self.assertEqual(shape, tuple(sum(ns) for ns in nsplit))

            nsplit = decide_dataframe_chunk_sizes(shape, (10, 3), memory_usage)
            [self.assertTrue(all(isinstance(i, Integral) for i in ns)) for ns in nsplit]
            self.assertEqual(shape, tuple(sum(ns) for ns in nsplit))