Пример #1
0
    def testReadCSV(self):
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                              columns=['a', 'b', 'c'],
                              dtype=np.int64)
            df.to_csv(file_path)
            mdf = read_csv(file_path, index_col=0, chunk_bytes=10)
            self.assertIsInstance(mdf.op, DataFrameReadCSV)
            self.assertEqual(mdf.shape[1], 3)
            pd.testing.assert_index_equal(df.columns,
                                          mdf.columns_value.to_pandas())

            mdf = mdf.tiles()
            self.assertEqual(len(mdf.chunks), 4)
            index_keys = set()
            for chunk in mdf.chunks:
                index_keys.add(chunk.index_value.key)
                pd.testing.assert_index_equal(df.columns,
                                              chunk.columns_value.to_pandas())
                pd.testing.assert_series_equal(df.dtypes, chunk.dtypes)
            self.assertGreater(len(index_keys), 1)
        finally:
            shutil.rmtree(tempdir)
Пример #2
0
def test_read_csv():
    tempdir = tempfile.mkdtemp()
    file_path = os.path.join(tempdir, 'test.csv')
    try:
        df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                          columns=['a', 'b', 'c'],
                          dtype=np.int64)
        df.to_csv(file_path)
        mdf = read_csv(file_path, index_col=0, chunk_bytes=10)
        assert isinstance(mdf.op, DataFrameReadCSV)
        assert mdf.shape[1] == 3
        pd.testing.assert_index_equal(df.columns,
                                      mdf.columns_value.to_pandas())

        mdf = tile(mdf)
        assert len(mdf.chunks) == 4
        index_keys = set()
        for chunk in mdf.chunks:
            index_keys.add(chunk.index_value.key)
            pd.testing.assert_index_equal(df.columns,
                                          chunk.columns_value.to_pandas())
            pd.testing.assert_series_equal(df.dtypes, chunk.dtypes)
        assert len(index_keys) > 1
    finally:
        shutil.rmtree(tempdir)