Пример #1
0
    def _check_excel_multiindex_dates(self, ext):
        path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext

        # try multiindex with dates
        tsframe = self.tsframe
        old_index = tsframe.index
        new_index = [old_index, np.arange(len(old_index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        tsframe.to_excel(path, 'test1', index_label=['time', 'foo'])
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=[0, 1])

        tm.assert_frame_equal(tsframe, recons, check_names=False)
        self.assertEquals(recons.index.names, ['time', 'foo'])

        # infer index
        tsframe.to_excel(path, 'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1')
        tm.assert_frame_equal(tsframe, recons)

        self.tsframe.index = old_index  # needed if setUP becomes classmethod

        os.remove(path)
Пример #2
0
    def test_excel_stop_iterator(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
        tm.assert_frame_equal(parsed, expected)
Пример #3
0
    def _check_extension_sheets(self, ext):
        path = '__tmp_to_excel_from_excel_sheets__.' + ext

        self.frame['A'][:5] = nan

        self.frame.to_excel(path, 'test1')
        self.frame.to_excel(path, 'test1', cols=['A', 'B'])
        self.frame.to_excel(path, 'test1', header=False)
        self.frame.to_excel(path, 'test1', index=False)

        # Test writing to separate sheets
        writer = ExcelWriter(path)
        self.frame.to_excel(writer, 'test1')
        self.tsframe.to_excel(writer, 'test2')
        writer.save()
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=0)
        tm.assert_frame_equal(self.frame, recons)
        recons = reader.parse('test2', index_col=0)
        tm.assert_frame_equal(self.tsframe, recons)
        np.testing.assert_equal(2, len(reader.sheet_names))
        np.testing.assert_equal('test1', reader.sheet_names[0])
        np.testing.assert_equal('test2', reader.sheet_names[1])

        os.remove(path)
Пример #4
0
    def test_excel_stop_iterator(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xls"))
        parsed = excel_data.parse("Sheet1")
        expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"])
        tm.assert_frame_equal(parsed, expected)
Пример #5
0
    def _check_extension_sheets(self, ext):
        path = '__tmp_to_excel_from_excel_sheets__.' + ext

        self.frame['A'][:5] = nan

        self.frame.to_excel(path,'test1')
        self.frame.to_excel(path,'test1', cols=['A', 'B'])
        self.frame.to_excel(path,'test1', header=False)
        self.frame.to_excel(path,'test1', index=False)

        # Test writing to separate sheets
        writer = ExcelWriter(path)
        self.frame.to_excel(writer,'test1')
        self.tsframe.to_excel(writer,'test2')
        writer.save()
        reader = ExcelFile(path)
        recons = reader.parse('test1',index_col=0)
        tm.assert_frame_equal(self.frame, recons)
        recons = reader.parse('test2',index_col=0)
        tm.assert_frame_equal(self.tsframe, recons)
        np.testing.assert_equal(2, len(reader.sheet_names))
        np.testing.assert_equal('test1', reader.sheet_names[0])
        np.testing.assert_equal('test2', reader.sheet_names[1])

        os.remove(path)
Пример #6
0
    def test_excel_cell_error_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, "test3.xls"))
        parsed = excel_data.parse("Sheet1")
        expected = DataFrame([[np.nan]], columns=["Test"])
        tm.assert_frame_equal(parsed, expected)
Пример #7
0
    def test_excel_cell_error_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([[np.nan]], columns=['Test'])
        tm.assert_frame_equal(parsed, expected)
Пример #8
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_excelsuite()

        for ext in ["xls", "xlsx"]:
            filename = u"\u0192u." + ext

            try:
                f = open(filename, "wb")
            except UnicodeEncodeError:
                raise nose.SkipTest("no unicode file names on this system")
            else:
                f.close()

            df = DataFrame(
                [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
                index=["A", "B"],
                columns=["X", "Y", "Z"],
            )
            df.to_excel(filename, "test1", float_format="%.2f")

            reader = ExcelFile(filename)
            rs = reader.parse("test1", index_col=None)
            xp = DataFrame(
                [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=["A", "B"], columns=["X", "Y", "Z"]
            )
            tm.assert_frame_equal(rs, xp)
            os.remove(filename)
    def test_to_excel_unicode_filename(self):
        _skip_if_no_excelsuite()

        for ext in ['xls', 'xlsx']:
            filename = '\u0192u.' + ext

            try:
                f = open(filename, 'wb')
            except UnicodeEncodeError:
                raise nose.SkipTest('no unicode file names on this system')
            else:
                f.close()

            df = DataFrame([[0.123456, 0.234567, 0.567567],
                            [12.32112, 123123.2, 321321.2]],
                           index=['A', 'B'], columns=['X', 'Y', 'Z'])

            with ensure_clean(filename) as filename:
                df.to_excel(filename, 'test1', float_format='%.2f')

                reader = ExcelFile(filename)
                rs = reader.parse('test1', index_col=None)
                xp = DataFrame([[0.12, 0.23, 0.57],
                                [12.32, 123123.20, 321321.20]],
                               index=['A', 'B'], columns=['X', 'Y', 'Z'])
                tm.assert_frame_equal(rs, xp)
Пример #10
0
    def _check_extension_sheets(self, ext):
        path = "__tmp_to_excel_from_excel_sheets__." + ext

        self.frame["A"][:5] = nan

        self.frame.to_excel(path, "test1")
        self.frame.to_excel(path, "test1", cols=["A", "B"])
        self.frame.to_excel(path, "test1", header=False)
        self.frame.to_excel(path, "test1", index=False)

        # Test writing to separate sheets
        writer = ExcelWriter(path)
        self.frame.to_excel(writer, "test1")
        self.tsframe.to_excel(writer, "test2")
        writer.save()
        reader = ExcelFile(path)
        recons = reader.parse("test1", index_col=0)
        tm.assert_frame_equal(self.frame, recons)
        recons = reader.parse("test2", index_col=0)
        tm.assert_frame_equal(self.tsframe, recons)
        np.testing.assert_equal(2, len(reader.sheet_names))
        np.testing.assert_equal("test1", reader.sheet_names[0])
        np.testing.assert_equal("test2", reader.sheet_names[1])

        os.remove(path)
Пример #11
0
    def test_excel_stop_iterator(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1'])
        assert_frame_equal(parsed, expected)
Пример #12
0
    def test_excel_cell_error_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([[np.nan]], columns=['Test'])
        assert_frame_equal(parsed, expected)
Пример #13
0
def getExcelChunck(file, ws, drange, rid=-1, cid=-1):
    xls = ExcelFile(file)
    df = xls.parse(ws)

    # get the range from expression
    # B4H4
    m = re.search('([A-Z]+)([0-9]+)([A-Z]+)([0-9]+)', drange)
    c1 = sord(m.group(1))
    c2 = sord(m.group(3)) + 1
    r1 = int(m.group(2)) - 2
    r2 = int(m.group(4)) - 2

    df2 = df.ix[r1:r2, c1:c2]

    if (rid >= 0):
        rh = int(rid) - 2
        df2.columns = df.ix[rh, c1:c2]
        df2.columns = df2.columns.map(
            lambda x: str(x).strip().replace('.0', ''))

    if (cid >= 0):
        ch = sord(cid)
        df2.index = df.ix[r1:r2, ch]
        df2.index = df2.index.map(lambda x: str(x).strip().replace('.0', ''))
    return (df2)
Пример #14
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_excelsuite()

        for ext in ['xls', 'xlsx']:
            filename = u'\u0192u.' + ext

            try:
                f = open(filename, 'wb')
            except UnicodeEncodeError:
                raise nose.SkipTest('no unicode file names on this system')
            else:
                f.close()

            df = DataFrame([[0.123456, 0.234567, 0.567567],
                            [12.32112, 123123.2, 321321.2]],
                           index=['A', 'B'],
                           columns=['X', 'Y', 'Z'])
            df.to_excel(filename, 'test1', float_format='%.2f')

            reader = ExcelFile(filename)
            rs = reader.parse('test1', index_col=None)
            xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
                           index=['A', 'B'],
                           columns=['X', 'Y', 'Z'])
            tm.assert_frame_equal(rs, xp)
            os.remove(filename)
    def _check_extension_mixed(self, ext):
        path = '__tmp_to_excel_from_excel_mixed__.' + ext

        with ensure_clean(path) as path:
            self.mixed_frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.mixed_frame, recons)
Пример #16
0
 def test_excel_table(self):
     pth = os.path.join(self.dirpath, 'test.xls')
     xls = ExcelFile(pth)
     df = xls.parse('Sheet1')
     df2 = read_csv(self.csv1)
     df3 = xls.parse('Sheet2', skiprows=[1])
     assert_frame_equal(df, df2)
     assert_frame_equal(df3, df2)
Пример #17
0
    def _check_extension_mixed(self, ext):
        path = '__tmp_to_excel_from_excel_mixed__.' + ext

        self.mixed_frame.to_excel(path, 'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=0, has_index_names=True)
        tm.assert_frame_equal(self.mixed_frame, recons)

        os.remove(path)
Пример #18
0
    def _check_extension_mixed(self, ext):
        path = '__tmp_to_excel_from_excel_mixed__.' + ext

        self.mixed_frame.to_excel(path,'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=0, has_index_names=True)
        tm.assert_frame_equal(self.mixed_frame, recons)

        os.remove(path)
Пример #19
0
    def _check_extension_mixed(self, ext):
        path = "__tmp_to_excel_from_excel_mixed__." + ext

        self.mixed_frame.to_excel(path, "test1")
        reader = ExcelFile(path)
        recons = reader.parse("test1", index_col=0)
        tm.assert_frame_equal(self.mixed_frame, recons)

        os.remove(path)
Пример #20
0
    def test_excel_cell_error_na(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest('xlrd not installed, skipping')

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([[np.nan]], columns=['Test'])
        assert_frame_equal(parsed, expected)
    def _check_extension_tsframe(self, ext):
        path = '__tmp_to_excel_from_excel_tsframe__.' + ext

        df = tm.makeTimeDataFrame()[:5]

        with ensure_clean(path) as path:
            df.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(df, recons)
Пример #22
0
    def test_xlsx_table(self):
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xlsx')
        xlsx = ExcelFile(pth)
        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
        assert_frame_equal(df, df2)
        assert_frame_equal(df3, df2)
Пример #23
0
    def test_excel_stop_iterator(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest('xlrd not installed, skipping')

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
        assert_frame_equal(parsed, expected)
Пример #24
0
    def test_xlsx_table(self):
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xlsx')
        xlsx = ExcelFile(pth)
        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
        assert_frame_equal(df, df2)
        assert_frame_equal(df3, df2)
Пример #25
0
    def test_excel_stop_iterator(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest('xlrd not installed, skipping')

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1'])
        assert_frame_equal(parsed, expected)
Пример #26
0
    def test_excel_stop_iterator(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest("xlrd not installed, skipping")

        excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xls"))
        parsed = excel_data.parse("Sheet1")
        expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"])
        assert_frame_equal(parsed, expected)
Пример #27
0
    def test_excel_cell_error_na(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest('xlrd not installed, skipping')

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([[np.nan]], columns=['Test'])
        assert_frame_equal(parsed, expected)
Пример #28
0
    def _check_extension(self, ext):
        path = '__tmp_to_excel_from_excel__.' + ext

        self.frame['A'][:5] = nan

        self.frame.to_excel(path, 'test1')
        self.frame.to_excel(path, 'test1', cols=['A', 'B'])
        self.frame.to_excel(path, 'test1', header=False)
        self.frame.to_excel(path, 'test1', index=False)

        # test roundtrip
        self.frame.to_excel(path, 'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=0)
        tm.assert_frame_equal(self.frame, recons)

        self.frame.to_excel(path, 'test1', index=False)
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=None)
        recons.index = self.frame.index
        tm.assert_frame_equal(self.frame, recons)

        self.frame.to_excel(path, 'test1', na_rep='NA')
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=0, na_values=['NA'])
        tm.assert_frame_equal(self.frame, recons)

        os.remove(path)
Пример #29
0
    def _check_extension_tsframe(self, ext):
        path = "__tmp_to_excel_from_excel_tsframe__." + ext

        df = tm.makeTimeDataFrame()[:5]

        df.to_excel(path, "test1")
        reader = ExcelFile(path)
        recons = reader.parse("test1")
        tm.assert_frame_equal(df, recons)

        os.remove(path)
Пример #30
0
    def test_excel_roundtrip_bool(self):
        _skip_if_no_openpyxl()

        # Test roundtrip np.bool8, does not seem to work for xls
        path = '__tmp_excel_roundtrip_bool__.xlsx'
        frame = (DataFrame(np.random.randn(10, 2)) >= 0)
        frame.to_excel(path, 'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1')
        tm.assert_frame_equal(frame, recons)
        os.remove(path)
Пример #31
0
    def test_excel_roundtrip_bool(self):
        _skip_if_no_openpyxl()

        # Test roundtrip np.bool8, does not seem to work for xls
        path = "__tmp_excel_roundtrip_bool__.xlsx"
        frame = DataFrame(np.random.randn(10, 2)) >= 0
        frame.to_excel(path, "test1")
        reader = ExcelFile(path)
        recons = reader.parse("test1")
        tm.assert_frame_equal(frame, recons)
        os.remove(path)
Пример #32
0
    def test_to_excel_periodindex(self):
        _skip_if_no_excelsuite()
        for ext in ['xls', 'xlsx']:
            path = '__tmp_to_excel_periodindex__.' + ext
            frame = self.tsframe
            xp = frame.resample('M', kind='period')
            xp.to_excel(path, 'sht1')

            reader = ExcelFile(path)
            rs = reader.parse('sht1', index_col=0, parse_dates=True)
            tm.assert_frame_equal(xp, rs.to_period('M'))
            os.remove(path)
Пример #33
0
 def test_excel_roundtrip_datetime(self):
     _skip_if_no_xlrd()
     _skip_if_no_xlwt()
     # datetime.date, not sure what to test here exactly
     path = '__tmp_excel_roundtrip_datetime__.xls'
     tsf = self.tsframe.copy()
     tsf.index = [x.date() for x in self.tsframe.index]
     tsf.to_excel(path, 'test1')
     reader = ExcelFile(path)
     recons = reader.parse('test1')
     tm.assert_frame_equal(self.tsframe, recons)
     os.remove(path)
Пример #34
0
 def test_excel_roundtrip_datetime(self):
     _skip_if_no_xlrd()
     _skip_if_no_xlwt()
     # datetime.date, not sure what to test here exactly
     path = "__tmp_excel_roundtrip_datetime__.xls"
     tsf = self.tsframe.copy()
     tsf.index = [x.date() for x in self.tsframe.index]
     tsf.to_excel(path, "test1")
     reader = ExcelFile(path)
     recons = reader.parse("test1")
     tm.assert_frame_equal(self.tsframe, recons)
     os.remove(path)
Пример #35
0
    def test_excel_roundtrip_bool(self):
        _skip_if_no_openpyxl()

        # Test roundtrip np.bool8, does not seem to work for xls
        path = '__tmp_excel_roundtrip_bool__.xlsx'
        frame = (DataFrame(np.random.randn(10, 2)) >= 0)
        with ensure_clean(path) as path:

            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(frame, recons)
Пример #36
0
    def test_to_excel_periodindex(self):
        _skip_if_no_excelsuite()
        for ext in ['xls', 'xlsx']:
            path = '__tmp_to_excel_periodindex__.' + ext
            frame = self.tsframe
            xp = frame.resample('M', kind='period')
            xp.to_excel(path, 'sht1')

            reader = ExcelFile(path)
            rs = reader.parse('sht1', index_col=0, parse_dates=True)
            tm.assert_frame_equal(xp, rs.to_period('M'))
            os.remove(path)
Пример #37
0
    def test_to_excel_periodindex(self):
        _skip_if_no_excelsuite()
        for ext in ["xls", "xlsx"]:
            path = "__tmp_to_excel_periodindex__." + ext
            frame = self.tsframe
            xp = frame.resample("M", kind="period")
            xp.to_excel(path, "sht1")

            reader = ExcelFile(path)
            rs = reader.parse("sht1", index_col=0, parse_dates=True)
            tm.assert_frame_equal(xp, rs.to_period("M"))
            os.remove(path)
Пример #38
0
    def test_excel_table(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest("xlrd not installed, skipping")

        pth = os.path.join(self.dirpath, "test.xls")
        xls = ExcelFile(pth)
        df = xls.parse("Sheet1", index_col=0, parse_dates=True)
        df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True)
        assert_frame_equal(df, df2)
        assert_frame_equal(df3, df2)
Пример #39
0
    def test_excel_table(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest('xlrd not installed, skipping')

        pth = os.path.join(self.dirpath, 'test.xls')
        xls = ExcelFile(pth)
        df = xls.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
        assert_frame_equal(df, df2)
        assert_frame_equal(df3, df2)
Пример #40
0
    def test_xlsx_table(self):
        try:
            import openpyxl
        except ImportError:
            raise nose.SkipTest('openpyxl not installed, skipping')

        pth = os.path.join(self.dirpath, 'test.xlsx')
        xlsx = ExcelFile(pth)
        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
        assert_frame_equal(df, df2)
        assert_frame_equal(df3, df2)
Пример #41
0
    def test_to_excel(self):
        try:
            import xlwt
            import xlrd
            import openpyxl
        except ImportError:
            raise nose.SkipTest

        path = "__tmp__.xlsx"
        self.panel.to_excel(path)
        reader = ExcelFile(path)
        for item, df in self.panel.iteritems():
            recdf = reader.parse(str(item), index_col=0)
            assert_frame_equal(df, recdf)
Пример #42
0
    def test_excel_roundtrip_datetime(self):
        _skip_if_no_xlrd()
        _skip_if_no_xlwt()

        # datetime.date, not sure what to test here exactly
        path = '__tmp_excel_roundtrip_datetime__.xls'
        tsf = self.tsframe.copy()
        with ensure_clean(path) as path:

            tsf.index = [x.date() for x in self.tsframe.index]
            tsf.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(self.tsframe, recons)
Пример #43
0
    def test_to_excel(self):
        try:
            import xlwt
            import xlrd
            import openpyxl
        except ImportError:
            raise nose.SkipTest

        path = '__tmp__.xlsx'
        self.panel.to_excel(path)
        reader = ExcelFile(path)
        for item, df in self.panel.iteritems():
            recdf = reader.parse(str(item),index_col=0)
            assert_frame_equal(df, recdf)
Пример #44
0
    def test_excel_read_buffer(self):
        _skip_if_no_xlrd()
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xls')
        f = open(pth, 'rb')
        xls = ExcelFile(f)
        # it works
        xls.parse('Sheet1', index_col=0, parse_dates=True)

        pth = os.path.join(self.dirpath, 'test.xlsx')
        f = open(pth, 'rb')
        xl = ExcelFile(f)
        df = xl.parse('Sheet1', index_col=0, parse_dates=True)
Пример #45
0
    def test_parse_cols_int(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ["", "x"]

        for s in suffix:
            pth = os.path.join(self.dirpath, "test.xls%s" % s)
            xls = ExcelFile(pth)
            df = xls.parse("Sheet1", index_col=0, parse_dates=True, parse_cols=3)
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=["A", "B", "C"])
            df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True, parse_cols=3)
            tm.assert_frame_equal(df, df2)
            tm.assert_frame_equal(df3, df2)
Пример #46
0
    def test_parse_cols_str(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ['', 'x']

        for s in suffix:

            pth = os.path.join(self.dirpath, 'test.xls%s' % s)
            xls = ExcelFile(pth)

            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols='A:D')
            df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['A', 'B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols='A:D')
            tm.assert_frame_equal(
                df, df2, check_names=False
            )  # TODO add index to xls, read xls ignores index name ?
            tm.assert_frame_equal(df3, df2, check_names=False)
            del df, df2, df3

            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols='A,C,D')
            df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols='A,C,D')
            tm.assert_frame_equal(
                df, df2, check_names=False)  # TODO add index to xls file
            tm.assert_frame_equal(df3, df2, check_names=False)
            del df, df2, df3

            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols='A,C:D')
            df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols='A,C:D')
            tm.assert_frame_equal(df, df2, check_names=False)
            tm.assert_frame_equal(df3, df2, check_names=False)
    def _check_extension_int64(self, ext):
        path = '__tmp_to_excel_from_excel_int64__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)
            
            # Test np.int64, values read come back as float
            frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np.int64)
            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1').astype(np.int64)
            tm.assert_frame_equal(frame, recons, check_dtype=False)
    def _check_extension_bool(self, ext):
        path = '__tmp_to_excel_from_excel_bool__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)
            
            # Test reading/writing np.bool8, roundtrip only works for xlsx
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1').astype(np.bool8)
            tm.assert_frame_equal(frame, recons)
Пример #49
0
    def test_to_excel_float_format(self):
        _skip_if_no_excelsuite()
        for ext in ['xls', 'xlsx']:
            filename = '__tmp_to_excel_float_format__.' + ext
            df = DataFrame([[0.123456, 0.234567, 0.567567],
                            [12.32112, 123123.2, 321321.2]],
                           index=['A', 'B'],
                           columns=['X', 'Y', 'Z'])
            df.to_excel(filename, 'test1', float_format='%.2f')

            reader = ExcelFile(filename)
            rs = reader.parse('test1', index_col=None)
            xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
                           index=['A', 'B'],
                           columns=['X', 'Y', 'Z'])
            tm.assert_frame_equal(rs, xp)
            os.remove(filename)
    def test_excel_roundtrip_indexname(self):
        _skip_if_no_xlrd()
        _skip_if_no_xlwt()

        path = '%s.xls' % tm.rands(10)

        df = DataFrame(np.random.randn(10, 4))
        df.index.name = 'foo'

        with ensure_clean(path) as path:
            df.to_excel(path)

            xf = ExcelFile(path)
            result = xf.parse(xf.sheet_names[0], index_col=0)
            
            tm.assert_frame_equal(result, df)
            self.assertEqual(result.index.name, 'foo')
Пример #51
0
    def test_to_excel(self):
        try:
            import os
            import xlwt
            import xlrd
            import openpyxl
            from pandas.io.parsers import ExcelFile
        except ImportError:
            raise nose.SkipTest

        for ext in ['xls', 'xlsx']:
            path = '__tmp__.' + ext
            self.panel.to_excel(path)
            reader = ExcelFile(path)
            for item, df in self.panel.iteritems():
                recdf = reader.parse(str(item), index_col=0)
                assert_frame_equal(df, recdf)
            os.remove(path)
    def _check_extension_colaliases(self, ext):
        path = '__tmp_to_excel_from_excel_aliases__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)
            
            # column aliases
            col_aliases = Index(['AA', 'X', 'Y', 'Z'])
            self.frame2.to_excel(path, 'test1', header=col_aliases)
            reader = ExcelFile(path)
            rs = reader.parse('test1', index_col=0)
            xp = self.frame2.copy()
            xp.columns = col_aliases
            tm.assert_frame_equal(xp, rs)
    def test_specify_kind_xlsx(self):
        _skip_if_no_openpyxl()
        xlsx_file = os.path.join(self.dirpath, 'test.xlsx')
        xls_file = os.path.join(self.dirpath, 'test.xls')

        self.assertRaises(Exception, ExcelFile, xls_file, kind='xlsx')

        ExcelFile(open(xlsx_file, 'rb'), kind='xlsx')

        self.assertRaises(Exception, ExcelFile, open(xls_file, 'rb'),
                          kind='xlsx')
Пример #54
0
    def _check_excel_multiindex(self, ext):
        path = '__tmp_to_excel_multiindex__' + ext + '__.' + ext

        frame = self.frame
        old_index = frame.index
        arrays = np.arange(len(old_index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index
        frame.to_excel(path, 'test1', header=False)
        frame.to_excel(path, 'test1', cols=['A', 'B'])

        # round trip
        frame.to_excel(path, 'test1')
        reader = ExcelFile(path)
        df = reader.parse('test1', index_col=[0, 1], parse_dates=False)
        tm.assert_frame_equal(frame, df)
        self.assertEqual(frame.index.names, df.index.names)
        self.frame.index = old_index  # needed if setUP becomes a classmethod

        os.remove(path)
Пример #55
0
    def test_excel_roundtrip_indexname(self):
        _skip_if_no_xlrd()
        _skip_if_no_xlwt()

        path = '%s.xls' % tm.rands(10)

        df = DataFrame(np.random.randn(10, 4))
        df.index.name = 'foo'

        df.to_excel(path)

        xf = ExcelFile(path)
        result = xf.parse(xf.sheet_names[0], index_col=0)

        tm.assert_frame_equal(result, df)
        self.assertEqual(result.index.name, 'foo')

        try:
            os.remove(path)
        except os.error:
            pass
Пример #56
0
    def _check_excel_multiindex_dates(self, ext):
        path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext

        # try multiindex with dates
        tsframe = self.tsframe
        old_index = tsframe.index
        new_index = [old_index, np.arange(len(old_index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        tsframe.to_excel(path, 'test1', index_label=['time', 'foo'])
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=[0, 1])

        tm.assert_frame_equal(tsframe, recons, check_names=False)
        self.assertEquals(recons.index.names, ['time', 'foo'])

        # infer index
        tsframe.to_excel(path, 'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1')
        tm.assert_frame_equal(tsframe, recons)

        self.tsframe.index = old_index  # needed if setUP becomes classmethod

        os.remove(path)
Пример #57
0
    def test_parse_cols_int(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ['', 'x']

        for s in suffix:
            pth = os.path.join(self.dirpath, 'test.xls%s' % s)
            xls = ExcelFile(pth)
            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols=3)
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['A', 'B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols=3)
            tm.assert_frame_equal(df, df2)
            tm.assert_frame_equal(df3, df2)
Пример #58
0
    def saving_simulation(self, file_path=None):
        """
        CANNOT BE COMPLETED FOR NOW BECAUSE OF A BUG OF PANDAS
        """

        if file_path is None:
            raise Exception(
                'A complete path to the file should be provided. DO not hesitate to use os.path.join'
            )

        writer = ExcelFile(file_path)
        print writer
        cohorts_list_base = [
            self.cohorts, self.cohorts_alt, self.percapita_pv,
            self.percapita_pv_alt, self.aggregate_pv, self.aggregate_pv_alt
        ]
        cohorts_list = []

        for df in cohorts_list_base:
            try:
                name = df.name
                cohorts_list.append(df)
                print 'good to go'
            except:
                print 'no such cohort in this simulation'

        print len(cohorts_list)
        df_dict = dict(
            (dataframe.name, dataframe) for dataframe in cohorts_list)
        print df_dict

        for name, attribute in df_dict.iteritems():
            print 'new dataframe'
            #try:
            attribute.to_excel(writer, sheet_name=name)
            #except:
            #    print 'BUG ENCOUNTERED'
        writer.save()