Пример #1
0
    def test_to_latex_filename(self, frame):
        with tm.ensure_clean('test.tex') as path:
            frame.to_latex(path)

            with open(path, 'r') as f:
                assert frame.to_latex() == f.read()

        # test with utf-8 and encoding option (GH 7061)
        df = DataFrame([[u'au\xdfgangen']])
        with tm.ensure_clean('test.tex') as path:
            df.to_latex(path, encoding='utf-8')
            with codecs.open(path, 'r', encoding='utf-8') as f:
                assert df.to_latex() == f.read()

        # test with utf-8 without encoding option
        if compat.PY3:  # python3: pandas default encoding is utf-8
            with tm.ensure_clean('test.tex') as path:
                df.to_latex(path)
                with codecs.open(path, 'r', encoding='utf-8') as f:
                    assert df.to_latex() == f.read()
        else:
            # python2 default encoding is ascii, so an error should be raised
            with tm.ensure_clean('test.tex') as path:
                with pytest.raises(UnicodeEncodeError):
                    df.to_latex(path)
Пример #2
0
    def test_xz(self):
        lzma = tm._skip_if_no_lzma()

        with open(self.csv1, "rb") as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = lzma.LZMAFile(path, mode="wb")
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression="xz")
            tm.assert_frame_equal(result, expected)

            with open(path, "rb") as f:
                result = self.read_csv(f, compression="xz")
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean("test.xz") as path:
            tmp = lzma.LZMAFile(path, mode="wb")
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression="infer")
            tm.assert_frame_equal(result, expected)
Пример #3
0
    def test_to_csv_quotechar(self):
        df = DataFrame({'col': [1, 2]})
        expected = """\
"","col"
"0","1"
"1","2"
"""

        with tm.ensure_clean('test.csv') as path:
            df.to_csv(path, quoting=1)  # 1=QUOTE_ALL
            with open(path, 'r') as f:
                assert f.read() == expected

        expected = """\
$$,$col$
$0$,$1$
$1$,$2$
"""

        with tm.ensure_clean('test.csv') as path:
            df.to_csv(path, quoting=1, quotechar="$")
            with open(path, 'r') as f:
                assert f.read() == expected

        with tm.ensure_clean('test.csv') as path:
            with pytest.raises(TypeError, match='quotechar'):
                df.to_csv(path, quoting=1, quotechar=None)
Пример #4
0
    def test_gzip(self):
        try:
            import gzip
        except ImportError:
            raise nose.SkipTest("need gzip to run")

        with open(self.csv1, "rb") as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = gzip.GzipFile(path, mode="wb")
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression="gzip")
            tm.assert_frame_equal(result, expected)

            with open(path, "rb") as f:
                result = self.read_csv(f, compression="gzip")
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean("test.gz") as path:
            tmp = gzip.GzipFile(path, mode="wb")
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression="infer")
            tm.assert_frame_equal(result, expected)
Пример #5
0
    def test_to_csv_line_terminators(self):
        # see gh-20353
        df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
                       index=['one', 'two', 'three'])

        with ensure_clean() as path:
            # case 1: CRLF as line terminator
            df.to_csv(path, line_terminator='\r\n')
            expected = b',A,B\r\none,1,4\r\ntwo,2,5\r\nthree,3,6\r\n'

            with open(path, mode='rb') as f:
                assert f.read() == expected

        with ensure_clean() as path:
            # case 2: LF as line terminator
            df.to_csv(path, line_terminator='\n')
            expected = b',A,B\none,1,4\ntwo,2,5\nthree,3,6\n'

            with open(path, mode='rb') as f:
                assert f.read() == expected

        with ensure_clean() as path:
            # case 3: The default line terminator(=os.linesep)(gh-21406)
            df.to_csv(path)
            os_linesep = os.linesep.encode('utf-8')
            expected = (b',A,B' + os_linesep + b'one,1,4' + os_linesep +
                        b'two,2,5' + os_linesep + b'three,3,6' + os_linesep)

            with open(path, mode='rb') as f:
                assert f.read() == expected
Пример #6
0
    def test_bz2(self):
        try:
            import bz2
        except ImportError:
            raise nose.SkipTest("need bz2 to run")

        with open(self.csv1, "rb") as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = bz2.BZ2File(path, mode="wb")
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression="bz2")
            tm.assert_frame_equal(result, expected)

            self.assertRaises(ValueError, self.read_csv, path, compression="bz3")

            with open(path, "rb") as fin:
                result = self.read_csv(fin, compression="bz2")
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean("test.bz2") as path:
            tmp = bz2.BZ2File(path, mode="wb")
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression="infer")
            tm.assert_frame_equal(result, expected)
Пример #7
0
    def test_read_infer(self, ext, get_random_path):
        if ext == '.xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ext
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file by inferred compression method
            df2 = pd.read_pickle(p2)

            tm.assert_frame_equal(df, df2)
Пример #8
0
    def test_write_infer(self, ext, get_random_path):
        base = get_random_path
        path1 = base + ext
        path2 = base + ".raw"
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file by inferred compression method
            df.to_pickle(p1)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)
Пример #9
0
    def test_to_csv_quotechar(self):
        df = DataFrame({'col': [1, 2]})
        expected = """\
"","col"
"0","1"
"1","2"
"""

        with tm.ensure_clean('test.csv') as path:
            df.to_csv(path, quoting=1)  # 1=QUOTE_ALL
            with open(path, 'r') as f:
                self.assertEqual(f.read(), expected)

        expected = """\
$$,$col$
$0$,$1$
$1$,$2$
"""

        with tm.ensure_clean('test.csv') as path:
            df.to_csv(path, quoting=1, quotechar="$")
            with open(path, 'r') as f:
                self.assertEqual(f.read(), expected)

        with tm.ensure_clean('test.csv') as path:
            with tm.assertRaisesRegexp(TypeError, 'quotechar'):
                df.to_csv(path, quoting=1, quotechar=None)
Пример #10
0
    def test_to_csv_with_single_column(self):
        # see gh-18676, https://bugs.python.org/issue32255
        #
        # Python's CSV library adds an extraneous '""'
        # before the newline when the NaN-value is in
        # the first row. Otherwise, only the newline
        # character is added. This behavior is inconsistent
        # and was patched in https://bugs.python.org/pull_request4672.
        df1 = DataFrame([None, 1])
        expected1 = """\
""
1.0
"""
        with tm.ensure_clean('test.csv') as path:
            df1.to_csv(path, header=None, index=None)
            with open(path, 'r') as f:
                assert f.read() == expected1

        df2 = DataFrame([1, None])
        expected2 = """\
1.0
""
"""
        with tm.ensure_clean('test.csv') as path:
            df2.to_csv(path, header=None, index=None)
            with open(path, 'r') as f:
                assert f.read() == expected2
Пример #11
0
    def test_other_compression(self, compress_type, compress_method, ext):

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            with compress_method(path, mode='wb') as tmp:
                tmp.write(data)

            result = self.read_csv(path, compression=compress_type)
            tm.assert_frame_equal(result, expected)

            if compress_type == 'bz2':
                pytest.raises(ValueError, self.read_csv,
                              path, compression='bz3')

            with open(path, 'rb') as fin:
                result = self.read_csv(fin, compression=compress_type)
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean('test.{}'.format(ext)) as path:
            with compress_method(path, mode='wb') as tmp:
                tmp.write(data)
            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)
Пример #12
0
    def test_decompression_regex_sep(self):
        # see gh-6607

        try:
            import gzip
            import bz2
        except ImportError:
            pytest.skip('need gzip and bz2 to run')

        with open(self.csv1, 'rb') as f:
            data = f.read()
        data = data.replace(b',', b'::')
        expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = gzip.GzipFile(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, sep='::', compression='gzip')
            tm.assert_frame_equal(result, expected)

        with tm.ensure_clean() as path:
            tmp = bz2.BZ2File(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, sep='::', compression='bz2')
            tm.assert_frame_equal(result, expected)

            pytest.raises(ValueError, self.read_csv,
                          path, compression='bz3')
Пример #13
0
    def test_bz2(self):
        try:
            import bz2
        except ImportError:
            raise nose.SkipTest('need bz2 to run')

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = bz2.BZ2File(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression='bz2')
            tm.assert_frame_equal(result, expected)

            self.assertRaises(ValueError, self.read_csv,
                              path, compression='bz3')

            with open(path, 'rb') as fin:
                if compat.PY3:
                    result = self.read_csv(fin, compression='bz2')
                    tm.assert_frame_equal(result, expected)
                elif self.engine is not 'python':
                    self.assertRaises(ValueError, self.read_csv,
                                      fin, compression='bz2')

        with tm.ensure_clean('test.bz2') as path:
            tmp = bz2.BZ2File(path, mode='wb')
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)
Пример #14
0
    def test_decompression_regex_sep(self):
        # see gh-6607

        try:
            import gzip
            import bz2
        except ImportError:
            raise nose.SkipTest("need gzip and bz2 to run")

        data = open(self.csv1, "rb").read()
        data = data.replace(b",", b"::")
        expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = gzip.GzipFile(path, mode="wb")
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, sep="::", compression="gzip")
            tm.assert_frame_equal(result, expected)

        with tm.ensure_clean() as path:
            tmp = bz2.BZ2File(path, mode="wb")
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, sep="::", compression="bz2")
            tm.assert_frame_equal(result, expected)

            self.assertRaises(ValueError, self.read_csv, path, compression="bz3")
Пример #15
0
    def test_xz(self):
        lzma = compat.import_lzma()

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression='xz')
            tm.assert_frame_equal(result, expected)

            with open(path, 'rb') as f:
                result = self.read_csv(f, compression='xz')
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean('test.xz') as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)
Пример #16
0
    def test_gzip(self):
        import gzip

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = gzip.GzipFile(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression='gzip')
            tm.assert_frame_equal(result, expected)

            with open(path, 'rb') as f:
                result = self.read_csv(f, compression='gzip')
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean('test.gz') as path:
            tmp = gzip.GzipFile(path, mode='wb')
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)
Пример #17
0
    def test_bz2(self):
        import bz2

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = bz2.BZ2File(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression='bz2')
            tm.assert_frame_equal(result, expected)

            pytest.raises(ValueError, self.read_csv,
                          path, compression='bz3')

            with open(path, 'rb') as fin:
                result = self.read_csv(fin, compression='bz2')
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean('test.bz2') as path:
            tmp = bz2.BZ2File(path, mode='wb')
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)
Пример #18
0
    def test_roundtrip_indexlabels(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_indexlabels__.' + ext

        with ensure_clean(path) as path:

            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # test index_label
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label=['test'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(
                path, 'test1', index_label=['test', 'dummy', 'dummy2'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label='test')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

        # test index_labels in same row as column names
        path = '%s.%s' % (tm.rands(10), ext)

        with ensure_clean(path) as path:

            self.frame.to_excel(path, 'test1',
                                cols=['A', 'B', 'C', 'D'], index=False)
            # take 'A' and 'B' as indexes (they are in same row as cols 'C',
            # 'D')
            df = self.frame.copy()
            df = df.set_index(['A', 'B'])

            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])
            tm.assert_frame_equal(df, recons)
Пример #19
0
    def test_to_csv_dups_cols(self):

        df = DataFrame(np.random.randn(1000, 30), columns=lrange(
            15) + lrange(15), dtype='float64')

        with ensure_clean() as filename:
            df.to_csv(filename)  # single dtype, fine
            result = read_csv(filename, index_col=0)
            result.columns = df.columns
            assert_frame_equal(result, df)

        df_float = DataFrame(np.random.randn(1000, 3), dtype='float64')
        df_int = DataFrame(np.random.randn(1000, 3), dtype='int64')
        df_bool = DataFrame(True, index=df_float.index, columns=lrange(3))
        df_object = DataFrame('foo', index=df_float.index, columns=lrange(3))
        df_dt = DataFrame(Timestamp('20010101'),
                          index=df_float.index, columns=lrange(3))
        df = pd.concat([df_float, df_int, df_bool, df_object,
                        df_dt], axis=1, ignore_index=True)

        cols = []
        for i in range(5):
            cols.extend([0, 1, 2])
        df.columns = cols

        from pandas import to_datetime
        with ensure_clean() as filename:
            df.to_csv(filename)
            result = read_csv(filename, index_col=0)

            # date cols
            for i in ['0.4', '1.4', '2.4']:
                result[i] = to_datetime(result[i])

            result.columns = df.columns
            assert_frame_equal(result, df)

        # GH3457
        from pandas.util.testing import makeCustomDataframe as mkdf

        N = 10
        df = mkdf(N, 3)
        df.columns = ['a', 'a', 'b']

        with ensure_clean() as filename:
            df.to_csv(filename)

            # read_csv will rename the dups columns
            result = read_csv(filename, index_col=0)
            result = result.rename(columns={'a.1': 'a'})
            assert_frame_equal(result, df)
Пример #20
0
    def test_zip(self):
        try:
            import zipfile
        except ImportError:
            raise nose.SkipTest('need zipfile to run')

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean('test_file.zip') as path:
            tmp = zipfile.ZipFile(path, mode='w')
            tmp.writestr('test_file', data)
            tmp.close()

            result = self.read_csv(path, compression='zip')
            tm.assert_frame_equal(result, expected)

            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)

            if self.engine is not 'python':
                with open(path, 'rb') as f:
                    result = self.read_csv(f, compression='zip')
                    tm.assert_frame_equal(result, expected)

        with tm.ensure_clean('combined_zip.zip') as path:
            inner_file_names = ['test_file', 'second_file']
            tmp = zipfile.ZipFile(path, mode='w')
            for file_name in inner_file_names:
                tmp.writestr(file_name, data)
            tmp.close()

            self.assertRaisesRegexp(ValueError, 'Multiple files',
                                    self.read_csv, path, compression='zip')

            self.assertRaisesRegexp(ValueError, 'Multiple files',
                                    self.read_csv, path, compression='infer')

        with tm.ensure_clean() as path:
            tmp = zipfile.ZipFile(path, mode='w')
            tmp.close()

            self.assertRaisesRegexp(ValueError, 'Zero files',
                                    self.read_csv, path, compression='zip')

        with tm.ensure_clean() as path:
            with open(path, 'wb') as f:
                self.assertRaises(zipfile.BadZipfile, self.read_csv,
                                  f, compression='zip')
Пример #21
0
def test_compression_size_fh(obj, method, compression_only):

    with tm.ensure_clean() as filename:
        with open(filename, 'w') as fh:
            getattr(obj, method)(fh, compression=compression_only)
            assert not fh.closed
        assert fh.closed
        compressed = os.path.getsize(filename)
    with tm.ensure_clean() as filename:
        with open(filename, 'w') as fh:
            getattr(obj, method)(fh, compression=None)
            assert not fh.closed
        assert fh.closed
        uncompressed = os.path.getsize(filename)
        assert uncompressed > compressed
Пример #22
0
    def test_warnings_errors(self):
        global e
        e = np.zeros((2, 2, 2, 2, 2))
        with ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter('always')
                stash(path, verbose=False)
                vault = unstash(path, verbose=False)
                self.assertTrue(len(w) == 1)
                self.assertTrue(issubclass(w[-1].category, UnsupportedDimensionWarning))

        e = np.zeros((2,))
        with ensure_clean() as path:
            self.assertRaises(TypeError, stash, path, frame=[e], verbose=False)
        del e
Пример #23
0
    def _check_extension_indexlabels(self, ext):
        path = "__tmp_to_excel_from_excel_indexlabels__." + ext

        with ensure_clean(path) as path:

            self.frame["A"][:5] = nan

            self.frame.to_excel(path, "test1")
            self.frame.to_excel(path, "test1", cols=["A", "B"])
            self.frame.to_excel(path, "test1", header=False)
            self.frame.to_excel(path, "test1", index=False)

            # test index_label
            frame = DataFrame(np.random.randn(10, 2)) >= 0
            frame.to_excel(path, "test1", index_label=["test"])
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0).astype(np.int64)
            frame.index.names = ["test"]
            self.assertEqual(frame.index.names, recons.index.names)

            frame = DataFrame(np.random.randn(10, 2)) >= 0
            frame.to_excel(path, "test1", index_label=["test", "dummy", "dummy2"])
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0).astype(np.int64)
            frame.index.names = ["test"]
            self.assertEqual(frame.index.names, recons.index.names)

            frame = DataFrame(np.random.randn(10, 2)) >= 0
            frame.to_excel(path, "test1", index_label="test")
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0).astype(np.int64)
            frame.index.names = ["test"]
            self.assertEqual(frame.index.names, recons.index.names)

        # test index_labels in same row as column names
        path = "%s.xls" % tm.rands(10)

        with ensure_clean(path) as path:

            self.frame.to_excel(path, "test1", cols=["A", "B", "C", "D"], index=False)
            # take 'A' and 'B' as indexes (they are in same row as cols 'C',
            # 'D')
            df = self.frame.copy()
            df = df.set_index(["A", "B"])

            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=[0, 1])
            tm.assert_frame_equal(df, recons)
Пример #24
0
    def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category') for col in original], axis=1)
        with tm.ensure_clean() as path:
            tm.assertRaises(ValueError, original.to_stata, path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category') for col in original], axis=1)

        with warnings.catch_warnings(record=True) as w:
            original.to_stata(path)
            tm.assert_equal(len(w), 1)  # should get a warning for mixed content
Пример #25
0
 def test_get_store(self):
     pytest.importorskip('tables')
     with tm.ensure_clean() as path:
         with tm.assert_produces_warning(FutureWarning,
                                         check_stacklevel=False):
             s = pd.get_store(path)
             s.close()
Пример #26
0
        def roundtrip(df, header=True, parser_hdr=0):

            with ensure_clean(self.ext) as path:
                df.to_excel(path, header=header, merge_cells=self.merge_cells)
                xf = pd.ExcelFile(path)
                res = xf.parse(xf.sheet_names[0], header=parser_hdr)
                return res
Пример #27
0
def test_lines_with_compression(compression):
    with tm.ensure_clean() as path:
        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
        df.to_json(path, orient='records', lines=True, compression=compression)
        roundtripped_df = pd.read_json(path, lines=True,
                                       compression=compression)
        assert_frame_equal(df, roundtripped_df)
Пример #28
0
def _check_plot_works(f, freq=None, series=None, *args, **kwargs):
    import matplotlib.pyplot as plt

    fig = plt.gcf()
    plt.clf()
    ax = fig.add_subplot(211)
    orig_ax = kwargs.pop('ax', plt.gca())
    orig_axfreq = getattr(orig_ax, 'freq', None)

    ret = f(*args, **kwargs)
    assert(ret is not None)  # do something more intelligent

    ax = kwargs.pop('ax', plt.gca())
    if series is not None:
        dfreq = series.index.freq
        if isinstance(dfreq, DateOffset):
            dfreq = dfreq.rule_code
        if orig_axfreq is None:
            assert(ax.freq == dfreq)

    if freq is not None and orig_axfreq is None:
        assert(ax.freq == freq)

    ax = fig.add_subplot(212)
    try:
        kwargs['ax'] = ax
        ret = f(*args, **kwargs)
        assert(ret is not None)  # do something more intelligent
    except Exception:
        pass

    with ensure_clean() as path:
        plt.savefig(path)
Пример #29
0
 def test_read_empty_dta(self):
     empty_ds = DataFrame(columns=['unit'])
     # GH 7369, make sure can read a 0-obs dta file
     with tm.ensure_clean() as path:
         empty_ds.to_stata(path,write_index=False)
         empty_ds2 = read_stata(path)
         tm.assert_frame_equal(empty_ds, empty_ds2)
Пример #30
0
    def test_excel_deprecated_options(self):
        with ensure_clean(self.ext) as path:
            with tm.assert_produces_warning(FutureWarning):
                self.frame.to_excel(path, 'test1', cols=['A', 'B'])

            with tm.assert_produces_warning(False):
                self.frame.to_excel(path, 'test1', columns=['A', 'B'])
Пример #31
0
    def _check_excel_multiindex_dates(self, ext):
        path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext

        # try multiindex with dates
        tsframe = self.tsframe
        old_index = tsframe.index
        new_index = [old_index, np.arange(len(old_index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        with ensure_clean(path) as path:
            tsframe.to_excel(path, 'test1', index_label=['time', 'foo'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])

            tm.assert_frame_equal(tsframe, recons, check_names=False)
            self.assertEquals(recons.index.names, ['time', 'foo'])

            # infer index
            tsframe.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(tsframe, recons)

            self.tsframe.index = old_index  # needed if setUP becomes classmethod
Пример #32
0
    def test_to_csv_mixed(self):

        def create_cols(name):
            return ["%s%03d" % (name, i) for i in range(5)]

        df_float = DataFrame(np.random.randn(
            100, 5), dtype='float64', columns=create_cols('float'))
        df_int = DataFrame(np.random.randn(100, 5),
                           dtype='int64', columns=create_cols('int'))
        df_bool = DataFrame(True, index=df_float.index,
                            columns=create_cols('bool'))
        df_object = DataFrame('foo', index=df_float.index,
                              columns=create_cols('object'))
        df_dt = DataFrame(Timestamp('20010101'),
                          index=df_float.index, columns=create_cols('date'))

        # add in some nans
        df_float.loc[30:50, 1:3] = np.nan

        # ## this is a bug in read_csv right now ####
        # df_dt.loc[30:50,1:3] = np.nan

        df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1)

        # dtype
        dtypes = dict()
        for n, dtype in [('float', np.float64), ('int', np.int64),
                         ('bool', np.bool), ('object', np.object)]:
            for c in create_cols(n):
                dtypes[c] = dtype

        with ensure_clean() as filename:
            df.to_csv(filename)
            rs = read_csv(filename, index_col=0, dtype=dtypes,
                          parse_dates=create_cols('date'))
            assert_frame_equal(rs, df)
Пример #33
0
    def test_to_csv_from_csv2(self):

        with ensure_clean("__tmp_to_csv_from_csv2__") as path:

            # duplicate index
            df = DataFrame(np.random.randn(3, 3),
                           index=["a", "a", "b"],
                           columns=["x", "y", "z"])
            df.to_csv(path)
            result = self.read_csv(path)
            assert_frame_equal(result, df)

            midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2),
                                           ("B", 1, 2)])
            df = DataFrame(np.random.randn(3, 3),
                           index=midx,
                           columns=["x", "y", "z"])

            df.to_csv(path)
            result = self.read_csv(path,
                                   index_col=[0, 1, 2],
                                   parse_dates=False)
            assert_frame_equal(result, df, check_names=False)

            # column aliases
            col_aliases = Index(["AA", "X", "Y", "Z"])
            self.frame2.to_csv(path, header=col_aliases)

            rs = self.read_csv(path)
            xp = self.frame2.copy()
            xp.columns = col_aliases
            assert_frame_equal(xp, rs)

            msg = "Writing 4 cols but got 2 aliases"
            with pytest.raises(ValueError, match=msg):
                self.frame2.to_csv(path, header=["AA", "X"])
Пример #34
0
    def test_read_excel_parse_dates(self, ext):
        # see gh-11544, gh-12051
        df = DataFrame({
            "col": [1, 2, 3],
            "date_strings": pd.date_range("2012-01-01", periods=3)
        })
        df2 = df.copy()
        df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y")

        with tm.ensure_clean(ext) as pth:
            df2.to_excel(pth)

            res = pd.read_excel(pth, index_col=0)
            tm.assert_frame_equal(df2, res)

            res = pd.read_excel(pth, parse_dates=["date_strings"], index_col=0)
            tm.assert_frame_equal(df, res)

            date_parser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y")
            res = pd.read_excel(pth,
                                parse_dates=["date_strings"],
                                date_parser=date_parser,
                                index_col=0)
            tm.assert_frame_equal(df, res)
Пример #35
0
    def test_read_write_dta11(self):
        original = DataFrame([(1, 2, 3, 4)],
                             columns=[
                                 'good',
                                 compat.u('b\u00E4d'), '8number',
                                 'astringwithmorethan32characters______'
                             ])
        formatted = DataFrame([(1, 2, 3, 4)],
                              columns=[
                                  'good', 'b_d', '_8number',
                                  'astringwithmorethan32characters_'
                              ])
        formatted.index.name = 'index'
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                original.to_stata(path, None)
                # should get a warning for that format.
            tm.assert_equal(len(w), 1)

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  formatted)
Пример #36
0
    def _check_extension_sheets(self, ext):
        path = '__tmp_to_excel_from_excel_sheets__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # Test writing to separate sheets
            writer = ExcelWriter(path)
            self.frame.to_excel(writer, 'test1')
            self.tsframe.to_excel(writer, 'test2')
            writer.save()
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)
            recons = reader.parse('test2', index_col=0)
            tm.assert_frame_equal(self.tsframe, recons)
            np.testing.assert_equal(2, len(reader.sheet_names))
            np.testing.assert_equal('test1', reader.sheet_names[0])
            np.testing.assert_equal('test2', reader.sheet_names[1])
Пример #37
0
    def test_categorical_writing(self):
        original = DataFrame.from_records(
            [["one", "ten", "one", "one", "one", 1],
             ["two", "nine", "two", "two", "two", 2],
             ["three", "eight", "three", "three", "three", 3],
             ["four", "seven", 4, "four", "four", 4],
             ["five", "six", 5, np.nan, "five", 5],
             ["six", "five", 6, np.nan, "six", 6],
             ["seven", "four", 7, np.nan, "seven", 7],
             ["eight", "three", 8, np.nan, "eight", 8],
             ["nine", "two", 9, np.nan, "nine", 9],
             ["ten", "one", "ten", np.nan, "ten", 10]],
            columns=[
                'fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                'labeled_with_missings', 'float_labelled', 'unlabeled'
            ])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat(
            [original[col].astype('category') for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat(
            [expected[col].astype('category') for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                # Silence warnings
                original.to_stata(path)
                written_and_read_again = self.read_dta(path)
                tm.assert_frame_equal(
                    written_and_read_again.set_index('index'), expected)
Пример #38
0
    def test_sheets(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # Test writing to separate sheets
            writer = ExcelWriter(path)
            self.frame.to_excel(writer, 'test1')
            self.tsframe.to_excel(writer, 'test2')
            writer.save()
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)
            recons = reader.parse('test2', index_col=0)
            tm.assert_frame_equal(self.tsframe, recons)
            np.testing.assert_equal(2, len(reader.sheet_names))
            np.testing.assert_equal('test1', reader.sheet_names[0])
            np.testing.assert_equal('test2', reader.sheet_names[1])
Пример #39
0
    def test_to_csv_from_csv2(self):

        with ensure_clean('__tmp_to_csv_from_csv2__') as path:

            # duplicate index
            df = DataFrame(np.random.randn(3, 3),
                           index=['a', 'a', 'b'],
                           columns=['x', 'y', 'z'])
            df.to_csv(path)
            result = self.read_csv(path)
            assert_frame_equal(result, df)

            midx = MultiIndex.from_tuples([('A', 1, 2), ('A', 1, 2),
                                           ('B', 1, 2)])
            df = DataFrame(np.random.randn(3, 3),
                           index=midx,
                           columns=['x', 'y', 'z'])

            df.to_csv(path)
            result = self.read_csv(path,
                                   index_col=[0, 1, 2],
                                   parse_dates=False)
            assert_frame_equal(result, df, check_names=False)

            # column aliases
            col_aliases = Index(['AA', 'X', 'Y', 'Z'])
            self.frame2.to_csv(path, header=col_aliases)

            rs = self.read_csv(path)
            xp = self.frame2.copy()
            xp.columns = col_aliases
            assert_frame_equal(xp, rs)

            msg = "Writing 4 cols but got 2 aliases"
            with pytest.raises(ValueError, match=msg):
                self.frame2.to_csv(path, header=['AA', 'X'])
Пример #40
0
    def test_unsupported_dtype(self):
        df = DataFrame(np.random.rand(5, 2), columns=list(
            'AB'), index=['1A', '1B', '1C', '1D', '1E'])

        with tm.ensure_clean('__unsupported_dtype__.csv') as path:
            df.to_csv(path)

            # valid but we don't support it (date)
            self.assertRaises(TypeError, self.read_csv, path,
                              dtype={'A': 'datetime64', 'B': 'float64'},
                              index_col=0)
            self.assertRaises(TypeError, self.read_csv, path,
                              dtype={'A': 'datetime64', 'B': 'float64'},
                              index_col=0, parse_dates=['B'])

            # valid but we don't support it
            self.assertRaises(TypeError, self.read_csv, path,
                              dtype={'A': 'timedelta64', 'B': 'float64'},
                              index_col=0)

            # valid but unsupported - fixed width unicode string
            self.assertRaises(TypeError, self.read_csv, path,
                              dtype={'A': 'U8'},
                              index_col=0)
Пример #41
0
    def test_to_excel_multiindex_no_write_index(self):
        _skip_if_no_xlrd()

        # Test writing and re-reading a MI witout the index. GH 5616.

        # Initial non-MI frame.
        frame1 = pd.DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]})

        # Add a MI.
        frame2 = frame1.copy()
        multi_index = pd.MultiIndex.from_tuples([(70, 80), (90, 100)])
        frame2.index = multi_index

        with ensure_clean(self.ext) as path:

            # Write out to Excel without the index.
            frame2.to_excel(path, 'test1', index=False)

            # Read it back in.
            reader = ExcelFile(path)
            frame3 = reader.parse('test1')

            # Test that it is the same as the initial frame.
            tm.assert_frame_equal(frame1, frame3)
Пример #42
0
    def set_engine_and_path(self, request, engine, ext):
        """Fixture to set engine and open file for use in each test case

        Rather than requiring `engine=...` to be provided explicitly as an
        argument in each test, this fixture sets a global option to dictate
        which engine should be used to write Excel files. After executing
        the test it rolls back said change to the global option.

        It also uses a context manager to open a temporary excel file for
        the function to write to, accessible via `self.path`

        Notes
        -----
        This fixture will run as part of each test method defined in the
        class and any subclasses, on account of the `autouse=True`
        argument
        """
        option_name = 'io.excel.{ext}.writer'.format(ext=ext.strip('.'))
        prev_engine = get_option(option_name)
        set_option(option_name, engine)
        with ensure_clean(ext) as path:
            self.path = path
            yield
        set_option(option_name, prev_engine)  # Roll back option change
Пример #43
0
    def _check_extension(self, ext):
        path = '__tmp_to_excel_from_excel__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # test roundtrip
            self.frame.to_excel(path, 'test1')
            recons = read_excel(path, 'test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)

            self.frame.to_excel(path, 'test1', index=False)
            recons = read_excel(path, 'test1', index_col=None)
            recons.index = self.frame.index
            tm.assert_frame_equal(self.frame, recons)

            self.frame.to_excel(path, 'test1', na_rep='NA')
            recons = read_excel(path, 'test1', index_col=0, na_values=['NA'])
            tm.assert_frame_equal(self.frame, recons)

            # GH 3611
            self.frame.to_excel(path, 'test1', na_rep='88')
            recons = read_excel(path, 'test1', index_col=0, na_values=['88'])
            tm.assert_frame_equal(self.frame, recons)

            self.frame.to_excel(path, 'test1', na_rep='88')
            recons = read_excel(path,
                                'test1',
                                index_col=0,
                                na_values=[88, 88.0])
            tm.assert_frame_equal(self.frame, recons)
Пример #44
0
 def test_path(self):
     with ensure_clean('test.json') as path:
         for df in [self.frame, self.frame2, self.intframe, self.tsframe,
                    self.mixed_frame]:
             df.to_json(path)
             read_json(path)
Пример #45
0
 def test_write_explicit_bad(self, compression, get_random_path):
     with tm.assertRaisesRegexp(ValueError,
                                "Unrecognized compression type"):
         with tm.ensure_clean(get_random_path) as path:
             df = tm.makeDataFrame()
             df.to_pickle(path, compression=compression)
Пример #46
0
 def roundtrip(s, encoding='latin-1'):
     with ensure_clean('test.json') as path:
         s.to_json(path, encoding=encoding)
         retr = read_json(path, encoding=encoding)
         assert_series_equal(s, retr, check_categorical=False)
Пример #47
0
 def test_stata_doc_examples(self):
     with tm.ensure_clean() as path:
         df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
         df.to_stata(path)
Пример #48
0
 def test_excelwriter_fspath(self):
     with tm.ensure_clean("foo.xlsx") as path:
         writer = ExcelWriter(path)
         assert os.fspath(writer) == str(path)
Пример #49
0
    def test_roundtrip_indexlabels(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:

            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # test index_label
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path,
                           'test1',
                           index_label=['test'],
                           merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = reader.parse('test1',
                                  index_col=0,
                                  has_index_names=self.merge_cells).astype(
                                      np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path,
                           'test1',
                           index_label=['test', 'dummy', 'dummy2'],
                           merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = reader.parse('test1',
                                  index_col=0,
                                  has_index_names=self.merge_cells).astype(
                                      np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path,
                           'test1',
                           index_label='test',
                           merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = reader.parse('test1',
                                  index_col=0,
                                  has_index_names=self.merge_cells).astype(
                                      np.int64)
            frame.index.names = ['test']
            self.assertAlmostEqual(frame.index.names, recons.index.names)

        with ensure_clean(self.ext) as path:

            self.frame.to_excel(path,
                                'test1',
                                cols=['A', 'B', 'C', 'D'],
                                index=False,
                                merge_cells=self.merge_cells)
            # take 'A' and 'B' as indexes (same row as cols 'C', 'D')
            df = self.frame.copy()
            df = df.set_index(['A', 'B'])

            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])
            tm.assert_frame_equal(df, recons, check_less_precise=True)
Пример #50
0
    def test_to_csv_multiindex(self):

        frame = self.frame
        old_index = frame.index
        arrays = np.arange(len(old_index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
        frame.index = new_index

        with ensure_clean("__tmp_to_csv_multiindex__") as path:

            frame.to_csv(path, header=False)
            frame.to_csv(path, columns=["A", "B"])

            # round trip
            frame.to_csv(path)

            df = self.read_csv(path, index_col=[0, 1], parse_dates=False)

            # TODO to_csv drops column name
            assert_frame_equal(frame, df, check_names=False)
            assert frame.index.names == df.index.names

            # needed if setUp becomes a class method
            self.frame.index = old_index

            # try multiindex with dates
            tsframe = self.tsframe
            old_index = tsframe.index
            new_index = [old_index, np.arange(len(old_index))]
            tsframe.index = MultiIndex.from_arrays(new_index)

            tsframe.to_csv(path, index_label=["time", "foo"])
            recons = self.read_csv(path, index_col=[0, 1])

            # TODO to_csv drops column name
            assert_frame_equal(tsframe, recons, check_names=False)

            # do not load index
            tsframe.to_csv(path)
            recons = self.read_csv(path, index_col=None)
            assert len(recons.columns) == len(tsframe.columns) + 2

            # no index
            tsframe.to_csv(path, index=False)
            recons = self.read_csv(path, index_col=None)
            assert_almost_equal(recons.values, self.tsframe.values)

            # needed if setUp becomes class method
            self.tsframe.index = old_index

        with ensure_clean("__tmp_to_csv_multiindex__") as path:
            # GH3571, GH1651, GH3141

            def _make_frame(names=None):
                if names is True:
                    names = ["first", "second"]
                return DataFrame(
                    np.random.randint(0, 10, size=(3, 3)),
                    columns=MultiIndex.from_tuples(
                        [("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names
                    ),
                    dtype="int64",
                )

            # column & index are multi-index
            df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1])
            assert_frame_equal(df, result)

            # column is mi
            df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1, 2, 3], index_col=0)
            assert_frame_equal(df, result)

            # dup column names?
            df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2])
            assert_frame_equal(df, result)

            # writing with no index
            df = _make_frame()
            df.to_csv(path, index=False)
            result = read_csv(path, header=[0, 1])
            assert_frame_equal(df, result)

            # we lose the names here
            df = _make_frame(True)
            df.to_csv(path, index=False)
            result = read_csv(path, header=[0, 1])
            assert com._all_none(*result.columns.names)
            result.columns.names = df.columns.names
            assert_frame_equal(df, result)

            # whatsnew example
            df = _make_frame()
            df.to_csv(path)
            result = read_csv(path, header=[0, 1], index_col=[0])
            assert_frame_equal(df, result)

            df = _make_frame(True)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1], index_col=[0])
            assert_frame_equal(df, result)

            # invalid options
            df = _make_frame(True)
            df.to_csv(path)

            for i in [6, 7]:
                msg = "len of {i}, but only 5 lines in file".format(i=i)
                with pytest.raises(ParserError, match=msg):
                    read_csv(path, header=list(range(i)), index_col=0)

            # write with cols
            msg = "cannot specify cols with a MultiIndex"
            with pytest.raises(TypeError, match=msg):
                df.to_csv(path, columns=["foo", "bar"])

        with ensure_clean("__tmp_to_csv_multiindex__") as path:
            # empty
            tsframe[:0].to_csv(path)
            recons = self.read_csv(path)

            exp = tsframe[:0]
            exp.index = []

            tm.assert_index_equal(recons.columns, exp.columns)
            assert len(recons) == 0
Пример #51
0
 def check_error_on_write(self, df, engine, exc):
     # check that we are raising the exception on writing
     with tm.ensure_clean() as path:
         with pytest.raises(exc):
             to_parquet(df, path, engine, compression=None)
Пример #52
0
    def _pickle_roundtrip(self, obj):

        with ensure_clean() as path:
            obj.to_pickle(path)
            unpickled = pd.read_pickle(path)
            return unpickled
Пример #53
0
        def _do_test(
            df, r_dtype=None, c_dtype=None, rnlvl=None, cnlvl=None, dupe_col=False
        ):

            kwargs = dict(parse_dates=False)
            if cnlvl:
                if rnlvl is not None:
                    kwargs["index_col"] = list(range(rnlvl))
                kwargs["header"] = list(range(cnlvl))

                with ensure_clean("__tmp_to_csv_moar__") as path:
                    df.to_csv(path, encoding="utf8", chunksize=chunksize)
                    recons = self.read_csv(path, **kwargs)
            else:
                kwargs["header"] = 0

                with ensure_clean("__tmp_to_csv_moar__") as path:
                    df.to_csv(path, encoding="utf8", chunksize=chunksize)
                    recons = self.read_csv(path, **kwargs)

            def _to_uni(x):
                if not isinstance(x, str):
                    return x.decode("utf8")
                return x

            if dupe_col:
                # read_Csv disambiguates the columns by
                # labeling them dupe.1,dupe.2, etc'. monkey patch columns
                recons.columns = df.columns
            if rnlvl and not cnlvl:
                delta_lvl = [recons.iloc[:, i].values for i in range(rnlvl - 1)]
                ix = MultiIndex.from_arrays([list(recons.index)] + delta_lvl)
                recons.index = ix
                recons = recons.iloc[:, rnlvl - 1 :]

            type_map = dict(i="i", f="f", s="O", u="O", dt="O", p="O")
            if r_dtype:
                if r_dtype == "u":  # unicode
                    r_dtype = "O"
                    recons.index = np.array(
                        [_to_uni(label) for label in recons.index], dtype=r_dtype
                    )
                    df.index = np.array(
                        [_to_uni(label) for label in df.index], dtype=r_dtype
                    )
                elif r_dtype == "dt":  # unicode
                    r_dtype = "O"
                    recons.index = np.array(
                        [Timestamp(label) for label in recons.index], dtype=r_dtype
                    )
                    df.index = np.array(
                        [Timestamp(label) for label in df.index], dtype=r_dtype
                    )
                elif r_dtype == "p":
                    r_dtype = "O"
                    idx_list = to_datetime(recons.index)
                    recons.index = np.array(
                        [Timestamp(label) for label in idx_list], dtype=r_dtype
                    )
                    df.index = np.array(
                        list(map(Timestamp, df.index.to_timestamp())), dtype=r_dtype
                    )
                else:
                    r_dtype = type_map.get(r_dtype)
                    recons.index = np.array(recons.index, dtype=r_dtype)
                    df.index = np.array(df.index, dtype=r_dtype)
            if c_dtype:
                if c_dtype == "u":
                    c_dtype = "O"
                    recons.columns = np.array(
                        [_to_uni(label) for label in recons.columns], dtype=c_dtype
                    )
                    df.columns = np.array(
                        [_to_uni(label) for label in df.columns], dtype=c_dtype
                    )
                elif c_dtype == "dt":
                    c_dtype = "O"
                    recons.columns = np.array(
                        [Timestamp(label) for label in recons.columns], dtype=c_dtype
                    )
                    df.columns = np.array(
                        [Timestamp(label) for label in df.columns], dtype=c_dtype
                    )
                elif c_dtype == "p":
                    c_dtype = "O"
                    col_list = to_datetime(recons.columns)
                    recons.columns = np.array(
                        [Timestamp(label) for label in col_list], dtype=c_dtype
                    )
                    col_list = df.columns.to_timestamp()
                    df.columns = np.array(
                        [Timestamp(label) for label in col_list], dtype=c_dtype
                    )
                else:
                    c_dtype = type_map.get(c_dtype)
                    recons.columns = np.array(recons.columns, dtype=c_dtype)
                    df.columns = np.array(df.columns, dtype=c_dtype)

            assert_frame_equal(df, recons, check_names=False, check_less_precise=True)
Пример #54
0
 def test_read(self, protocol, get_random_path):
     with tm.ensure_clean(get_random_path) as path:
         df = tm.makeDataFrame()
         df.to_pickle(path, protocol=protocol)
         df2 = pd.read_pickle(path)
         tm.assert_frame_equal(df, df2)
Пример #55
0
def path(ext):
    """
    Fixture to open file for use in each test case.
    """
    with tm.ensure_clean(ext) as file_path:
        yield file_path
Пример #56
0
        def _do_test(df, r_dtype=None, c_dtype=None,
                     rnlvl=None, cnlvl=None, dupe_col=False):

            kwargs = dict(parse_dates=False)
            if cnlvl:
                if rnlvl is not None:
                    kwargs['index_col'] = lrange(rnlvl)
                kwargs['header'] = lrange(cnlvl)

                with ensure_clean('__tmp_to_csv_moar__') as path:
                    df.to_csv(path, encoding='utf8',
                              chunksize=chunksize)
                    recons = self.read_csv(path, **kwargs)
            else:
                kwargs['header'] = 0

                with ensure_clean('__tmp_to_csv_moar__') as path:
                    df.to_csv(path, encoding='utf8', chunksize=chunksize)
                    recons = self.read_csv(path, **kwargs)

            def _to_uni(x):
                if not isinstance(x, compat.text_type):
                    return x.decode('utf8')
                return x
            if dupe_col:
                # read_Csv disambiguates the columns by
                # labeling them dupe.1,dupe.2, etc'. monkey patch columns
                recons.columns = df.columns
            if rnlvl and not cnlvl:
                delta_lvl = [recons.iloc[
                    :, i].values for i in range(rnlvl - 1)]
                ix = MultiIndex.from_arrays([list(recons.index)] + delta_lvl)
                recons.index = ix
                recons = recons.iloc[:, rnlvl - 1:]

            type_map = dict(i='i', f='f', s='O', u='O', dt='O', p='O')
            if r_dtype:
                if r_dtype == 'u':  # unicode
                    r_dtype = 'O'
                    recons.index = np.array(lmap(_to_uni, recons.index),
                                            dtype=r_dtype)
                    df.index = np.array(lmap(_to_uni, df.index), dtype=r_dtype)
                elif r_dtype == 'dt':  # unicode
                    r_dtype = 'O'
                    recons.index = np.array(lmap(Timestamp, recons.index),
                                            dtype=r_dtype)
                    df.index = np.array(
                        lmap(Timestamp, df.index), dtype=r_dtype)
                elif r_dtype == 'p':
                    r_dtype = 'O'
                    recons.index = np.array(
                        list(map(Timestamp, to_datetime(recons.index))),
                        dtype=r_dtype)
                    df.index = np.array(
                        list(map(Timestamp, df.index.to_timestamp())),
                        dtype=r_dtype)
                else:
                    r_dtype = type_map.get(r_dtype)
                    recons.index = np.array(recons.index, dtype=r_dtype)
                    df.index = np.array(df.index, dtype=r_dtype)
            if c_dtype:
                if c_dtype == 'u':
                    c_dtype = 'O'
                    recons.columns = np.array(lmap(_to_uni, recons.columns),
                                              dtype=c_dtype)
                    df.columns = np.array(
                        lmap(_to_uni, df.columns), dtype=c_dtype)
                elif c_dtype == 'dt':
                    c_dtype = 'O'
                    recons.columns = np.array(lmap(Timestamp, recons.columns),
                                              dtype=c_dtype)
                    df.columns = np.array(
                        lmap(Timestamp, df.columns), dtype=c_dtype)
                elif c_dtype == 'p':
                    c_dtype = 'O'
                    recons.columns = np.array(
                        lmap(Timestamp, to_datetime(recons.columns)),
                        dtype=c_dtype)
                    df.columns = np.array(
                        lmap(Timestamp, df.columns.to_timestamp()),
                        dtype=c_dtype)
                else:
                    c_dtype = type_map.get(c_dtype)
                    recons.columns = np.array(recons.columns, dtype=c_dtype)
                    df.columns = np.array(df.columns, dtype=c_dtype)

            assert_frame_equal(df, recons, check_names=False,
                               check_less_precise=True)
Пример #57
0
def test_styler_to_excel(engine):
    def style(df):
        # XXX: RGB colors not supported in xlwt
        return DataFrame(
            [['font-weight: bold', '', ''], ['', 'color: blue', ''],
             ['', '', 'text-decoration: underline'],
             ['border-style: solid', '', ''], ['', 'font-style: italic', ''],
             ['', '', 'text-align: right'], ['background-color: red', '', ''],
             ['number-format: 0%', '', ''], ['', '', ''], ['', '', ''],
             ['', '', '']],
            index=df.index,
            columns=df.columns)

    def assert_equal_style(cell1, cell2, engine):
        if engine in ['xlsxwriter', 'openpyxl']:
            pytest.xfail(reason=("GH25351: failing on some attribute "
                                 "comparisons in {}".format(engine)))
        # XXX: should find a better way to check equality
        assert cell1.alignment.__dict__ == cell2.alignment.__dict__
        assert cell1.border.__dict__ == cell2.border.__dict__
        assert cell1.fill.__dict__ == cell2.fill.__dict__
        assert cell1.font.__dict__ == cell2.font.__dict__
        assert cell1.number_format == cell2.number_format
        assert cell1.protection.__dict__ == cell2.protection.__dict__

    def custom_converter(css):
        # use bold iff there is custom style attached to the cell
        if css.strip(' \n;'):
            return {'font': {'bold': True}}
        return {}

    pytest.importorskip('jinja2')
    pytest.importorskip(engine)

    # Prepare spreadsheets

    df = DataFrame(np.random.randn(11, 3))
    with ensure_clean('.xlsx' if engine != 'xlwt' else '.xls') as path:
        writer = ExcelWriter(path, engine=engine)
        df.to_excel(writer, sheet_name='frame')
        df.style.to_excel(writer, sheet_name='unstyled')
        styled = df.style.apply(style, axis=None)
        styled.to_excel(writer, sheet_name='styled')
        ExcelFormatter(styled, style_converter=custom_converter).write(
            writer, sheet_name='custom')
        writer.save()

        if engine not in ('openpyxl', 'xlsxwriter'):
            # For other engines, we only smoke test
            return
        openpyxl = pytest.importorskip('openpyxl')
        wb = openpyxl.load_workbook(path)

        # (1) compare DataFrame.to_excel and Styler.to_excel when unstyled
        n_cells = 0
        for col1, col2 in zip(wb['frame'].columns, wb['unstyled'].columns):
            assert len(col1) == len(col2)
            for cell1, cell2 in zip(col1, col2):
                assert cell1.value == cell2.value
                assert_equal_style(cell1, cell2, engine)
                n_cells += 1

        # ensure iteration actually happened:
        assert n_cells == (11 + 1) * (3 + 1)

        # (2) check styling with default converter

        # XXX: openpyxl (as at 2.4) prefixes colors with 00, xlsxwriter with FF
        alpha = '00' if engine == 'openpyxl' else 'FF'

        n_cells = 0
        for col1, col2 in zip(wb['frame'].columns, wb['styled'].columns):
            assert len(col1) == len(col2)
            for cell1, cell2 in zip(col1, col2):
                ref = '%s%d' % (cell2.column, cell2.row)
                # XXX: this isn't as strong a test as ideal; we should
                #      confirm that differences are exclusive
                if ref == 'B2':
                    assert not cell1.font.bold
                    assert cell2.font.bold
                elif ref == 'C3':
                    assert cell1.font.color.rgb != cell2.font.color.rgb
                    assert cell2.font.color.rgb == alpha + '0000FF'
                elif ref == 'D4':
                    # This fails with engine=xlsxwriter due to
                    # https://bitbucket.org/openpyxl/openpyxl/issues/800
                    if engine == 'xlsxwriter' \
                       and (LooseVersion(openpyxl.__version__) <
                            LooseVersion('2.4.6')):
                        pass
                    else:
                        assert cell1.font.underline != cell2.font.underline
                        assert cell2.font.underline == 'single'
                elif ref == 'B5':
                    assert not cell1.border.left.style
                    assert (cell2.border.top.style == cell2.border.right.style
                            == cell2.border.bottom.style ==
                            cell2.border.left.style == 'medium')
                elif ref == 'C6':
                    assert not cell1.font.italic
                    assert cell2.font.italic
                elif ref == 'D7':
                    assert (cell1.alignment.horizontal !=
                            cell2.alignment.horizontal)
                    assert cell2.alignment.horizontal == 'right'
                elif ref == 'B8':
                    assert cell1.fill.fgColor.rgb != cell2.fill.fgColor.rgb
                    assert cell1.fill.patternType != cell2.fill.patternType
                    assert cell2.fill.fgColor.rgb == alpha + 'FF0000'
                    assert cell2.fill.patternType == 'solid'
                elif ref == 'B9':
                    assert cell1.number_format == 'General'
                    assert cell2.number_format == '0%'
                else:
                    assert_equal_style(cell1, cell2, engine)

                assert cell1.value == cell2.value
                n_cells += 1

        assert n_cells == (11 + 1) * (3 + 1)

        # (3) check styling with custom converter
        n_cells = 0
        for col1, col2 in zip(wb['frame'].columns, wb['custom'].columns):
            assert len(col1) == len(col2)
            for cell1, cell2 in zip(col1, col2):
                ref = '%s%d' % (cell2.column, cell2.row)
                if ref in ('B2', 'C3', 'D4', 'B5', 'C6', 'D7', 'B8', 'B9'):
                    assert not cell1.font.bold
                    assert cell2.font.bold
                else:
                    assert_equal_style(cell1, cell2, engine)

                assert cell1.value == cell2.value
                n_cells += 1

        assert n_cells == (11 + 1) * (3 + 1)
Пример #58
0
    def test_read_excel_multiindex_empty_level(self, ext):
        # see gh-12453
        with tm.ensure_clean(ext) as path:
            df = DataFrame({
                ("One", "x"): {
                    0: 1
                },
                ("Two", "X"): {
                    0: 3
                },
                ("Two", "Y"): {
                    0: 7
                },
                ("Zero", ""): {
                    0: 0
                },
            })

            expected = DataFrame({
                ("One", "x"): {
                    0: 1
                },
                ("Two", "X"): {
                    0: 3
                },
                ("Two", "Y"): {
                    0: 7
                },
                ("Zero", "Unnamed: 4_level_1"): {
                    0: 0
                },
            })

            df.to_excel(path)
            actual = pd.read_excel(path, header=[0, 1], index_col=0)
            tm.assert_frame_equal(actual, expected)

            df = pd.DataFrame({
                ("Beg", ""): {
                    0: 0
                },
                ("Middle", "x"): {
                    0: 1
                },
                ("Tail", "X"): {
                    0: 3
                },
                ("Tail", "Y"): {
                    0: 7
                },
            })

            expected = pd.DataFrame({
                ("Beg", "Unnamed: 1_level_1"): {
                    0: 0
                },
                ("Middle", "x"): {
                    0: 1
                },
                ("Tail", "X"): {
                    0: 3
                },
                ("Tail", "Y"): {
                    0: 7
                },
            })

            df.to_excel(path)
            actual = pd.read_excel(path, header=[0, 1], index_col=0)
            tm.assert_frame_equal(actual, expected)
Пример #59
0
    def test_to_csv_multiindex(self):

        frame = self.frame
        old_index = frame.index
        arrays = np.arange(len(old_index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index

        with ensure_clean('__tmp_to_csv_multiindex__') as path:

            frame.to_csv(path, header=False)
            frame.to_csv(path, columns=['A', 'B'])

            # round trip
            frame.to_csv(path)
            df = DataFrame.from_csv(path, index_col=[0, 1], parse_dates=False)

            # TODO to_csv drops column name
            assert_frame_equal(frame, df, check_names=False)
            self.assertEqual(frame.index.names, df.index.names)

            # needed if setUP becomes a classmethod
            self.frame.index = old_index

            # try multiindex with dates
            tsframe = self.tsframe
            old_index = tsframe.index
            new_index = [old_index, np.arange(len(old_index))]
            tsframe.index = MultiIndex.from_arrays(new_index)

            tsframe.to_csv(path, index_label=['time', 'foo'])
            recons = DataFrame.from_csv(path, index_col=[0, 1])
            # TODO to_csv drops column name
            assert_frame_equal(tsframe, recons, check_names=False)

            # do not load index
            tsframe.to_csv(path)
            recons = DataFrame.from_csv(path, index_col=None)
            self.assertEqual(len(recons.columns), len(tsframe.columns) + 2)

            # no index
            tsframe.to_csv(path, index=False)
            recons = DataFrame.from_csv(path, index_col=None)
            assert_almost_equal(recons.values, self.tsframe.values)

            # needed if setUP becomes classmethod
            self.tsframe.index = old_index

        with ensure_clean('__tmp_to_csv_multiindex__') as path:
            # GH3571, GH1651, GH3141

            def _make_frame(names=None):
                if names is True:
                    names = ['first', 'second']
                return DataFrame(np.random.randint(0, 10, size=(3, 3)),
                                 columns=MultiIndex.from_tuples(
                                     [('bah', 'foo'), ('bah', 'bar'),
                                      ('ban', 'baz')],
                                     names=names),
                                 dtype='int64')

            # column & index are multi-index
            df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
            df.to_csv(path, tupleize_cols=False)
            result = read_csv(path,
                              header=[0, 1, 2, 3],
                              index_col=[0, 1],
                              tupleize_cols=False)
            assert_frame_equal(df, result)

            # column is mi
            df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4)
            df.to_csv(path, tupleize_cols=False)
            result = read_csv(path,
                              header=[0, 1, 2, 3],
                              index_col=0,
                              tupleize_cols=False)
            assert_frame_equal(df, result)

            # dup column names?
            df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4)
            df.to_csv(path, tupleize_cols=False)
            result = read_csv(path,
                              header=[0, 1, 2, 3],
                              index_col=[0, 1, 2],
                              tupleize_cols=False)
            assert_frame_equal(df, result)

            # writing with no index
            df = _make_frame()
            df.to_csv(path, tupleize_cols=False, index=False)
            result = read_csv(path, header=[0, 1], tupleize_cols=False)
            assert_frame_equal(df, result)

            # we lose the names here
            df = _make_frame(True)
            df.to_csv(path, tupleize_cols=False, index=False)
            result = read_csv(path, header=[0, 1], tupleize_cols=False)
            self.assertTrue(all([x is None for x in result.columns.names]))
            result.columns.names = df.columns.names
            assert_frame_equal(df, result)

            # tupleize_cols=True and index=False
            df = _make_frame(True)
            df.to_csv(path, tupleize_cols=True, index=False)
            result = read_csv(path,
                              header=0,
                              tupleize_cols=True,
                              index_col=None)
            result.columns = df.columns
            assert_frame_equal(df, result)

            # whatsnew example
            df = _make_frame()
            df.to_csv(path, tupleize_cols=False)
            result = read_csv(path,
                              header=[0, 1],
                              index_col=[0],
                              tupleize_cols=False)
            assert_frame_equal(df, result)

            df = _make_frame(True)
            df.to_csv(path, tupleize_cols=False)
            result = read_csv(path,
                              header=[0, 1],
                              index_col=[0],
                              tupleize_cols=False)
            assert_frame_equal(df, result)

            # column & index are multi-index (compatibility)
            df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
            df.to_csv(path, tupleize_cols=True)
            result = read_csv(path,
                              header=0,
                              index_col=[0, 1],
                              tupleize_cols=True)
            result.columns = df.columns
            assert_frame_equal(df, result)

            # invalid options
            df = _make_frame(True)
            df.to_csv(path, tupleize_cols=False)

            # catch invalid headers
            with assertRaisesRegexp(
                    CParserError, 'Passed header=\[0,1,2\] are too many '
                    'rows for this multi_index of columns'):
                read_csv(path,
                         tupleize_cols=False,
                         header=lrange(3),
                         index_col=0)

            with assertRaisesRegexp(
                    CParserError, 'Passed header=\[0,1,2,3,4,5,6\], len of '
                    '7, but only 6 lines in file'):
                read_csv(path,
                         tupleize_cols=False,
                         header=lrange(7),
                         index_col=0)

            for i in [4, 5, 6]:
                with tm.assertRaises(CParserError):
                    read_csv(path,
                             tupleize_cols=False,
                             header=lrange(i),
                             index_col=0)

            # write with cols
            with assertRaisesRegexp(TypeError, 'cannot specify cols with a '
                                    'MultiIndex'):
                df.to_csv(path, tupleize_cols=False, columns=['foo', 'bar'])

        with ensure_clean('__tmp_to_csv_multiindex__') as path:
            # empty
            tsframe[:0].to_csv(path)
            recons = DataFrame.from_csv(path)
            exp = tsframe[:0]
            exp.index = []

            self.assert_index_equal(recons.columns, exp.columns)
            self.assertEqual(len(recons), 0)
Пример #60
0
    def test_to_csv_multiindex(self):

        frame = self.frame
        old_index = frame.index
        arrays = np.arange(len(old_index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index

        with ensure_clean('__tmp_to_csv_multiindex__') as path:

            frame.to_csv(path, header=False)
            frame.to_csv(path, columns=['A', 'B'])

            # round trip
            frame.to_csv(path)

            df = self.read_csv(path, index_col=[0, 1],
                               parse_dates=False)

            # TODO to_csv drops column name
            assert_frame_equal(frame, df, check_names=False)
            assert frame.index.names == df.index.names

            # needed if setUp becomes a class method
            self.frame.index = old_index

            # try multiindex with dates
            tsframe = self.tsframe
            old_index = tsframe.index
            new_index = [old_index, np.arange(len(old_index))]
            tsframe.index = MultiIndex.from_arrays(new_index)

            tsframe.to_csv(path, index_label=['time', 'foo'])
            recons = self.read_csv(path, index_col=[0, 1])

            # TODO to_csv drops column name
            assert_frame_equal(tsframe, recons, check_names=False)

            # do not load index
            tsframe.to_csv(path)
            recons = self.read_csv(path, index_col=None)
            assert len(recons.columns) == len(tsframe.columns) + 2

            # no index
            tsframe.to_csv(path, index=False)
            recons = self.read_csv(path, index_col=None)
            assert_almost_equal(recons.values, self.tsframe.values)

            # needed if setUp becomes class method
            self.tsframe.index = old_index

        with ensure_clean('__tmp_to_csv_multiindex__') as path:
            # GH3571, GH1651, GH3141

            def _make_frame(names=None):
                if names is True:
                    names = ['first', 'second']
                return DataFrame(np.random.randint(0, 10, size=(3, 3)),
                                 columns=MultiIndex.from_tuples(
                                     [('bah', 'foo'),
                                      ('bah', 'bar'),
                                      ('ban', 'baz')], names=names),
                                 dtype='int64')

            # column & index are multi-index
            df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1, 2, 3],
                              index_col=[0, 1])
            assert_frame_equal(df, result)

            # column is mi
            df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4)
            df.to_csv(path)
            result = read_csv(
                path, header=[0, 1, 2, 3], index_col=0)
            assert_frame_equal(df, result)

            # dup column names?
            df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1, 2, 3],
                              index_col=[0, 1, 2])
            assert_frame_equal(df, result)

            # writing with no index
            df = _make_frame()
            df.to_csv(path, index=False)
            result = read_csv(path, header=[0, 1])
            assert_frame_equal(df, result)

            # we lose the names here
            df = _make_frame(True)
            df.to_csv(path, index=False)
            result = read_csv(path, header=[0, 1])
            assert com._all_none(*result.columns.names)
            result.columns.names = df.columns.names
            assert_frame_equal(df, result)

            # tupleize_cols=True and index=False
            df = _make_frame(True)
            with tm.assert_produces_warning(FutureWarning):
                df.to_csv(path, tupleize_cols=True, index=False)

            with tm.assert_produces_warning(FutureWarning,
                                            check_stacklevel=False):
                result = read_csv(path, header=0,
                                  tupleize_cols=True,
                                  index_col=None)
            result.columns = df.columns
            assert_frame_equal(df, result)

            # whatsnew example
            df = _make_frame()
            df.to_csv(path)
            result = read_csv(path, header=[0, 1],
                              index_col=[0])
            assert_frame_equal(df, result)

            df = _make_frame(True)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1],
                              index_col=[0])
            assert_frame_equal(df, result)

            # column & index are multi-index (compatibility)
            df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
            with tm.assert_produces_warning(FutureWarning):
                df.to_csv(path, tupleize_cols=True)

            with tm.assert_produces_warning(FutureWarning,
                                            check_stacklevel=False):
                result = read_csv(path, header=0, index_col=[0, 1],
                                  tupleize_cols=True)
            result.columns = df.columns
            assert_frame_equal(df, result)

            # invalid options
            df = _make_frame(True)
            df.to_csv(path)

            for i in [6, 7]:
                msg = 'len of {i}, but only 5 lines in file'.format(i=i)
                with tm.assert_raises_regex(ParserError, msg):
                    read_csv(path, header=lrange(i), index_col=0)

            # write with cols
            with tm.assert_raises_regex(TypeError, 'cannot specify cols '
                                        'with a MultiIndex'):
                df.to_csv(path, columns=['foo', 'bar'])

        with ensure_clean('__tmp_to_csv_multiindex__') as path:
            # empty
            tsframe[:0].to_csv(path)
            recons = self.read_csv(path)

            exp = tsframe[:0]
            exp.index = []

            tm.assert_index_equal(recons.columns, exp.columns)
            assert len(recons) == 0