def _check_excel_multiindex_dates(self, ext): path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext # try multiindex with dates tsframe = self.tsframe old_index = tsframe.index new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) tsframe.to_excel(path, 'test1', index_label=['time', 'foo']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons, check_names=False) self.assertEquals(recons.index.names, ['time', 'foo']) # infer index tsframe.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(tsframe, recons) self.tsframe.index = old_index # needed if setUP becomes classmethod os.remove(path)
def test_excel_stop_iterator(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls')) parsed = excel_data.parse('Sheet1') expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1']) tm.assert_frame_equal(parsed, expected)
def _check_extension_sheets(self, ext): path = '__tmp_to_excel_from_excel_sheets__.' + ext self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test writing to separate sheets writer = ExcelWriter(path) self.frame.to_excel(writer, 'test1') self.tsframe.to_excel(writer, 'test2') writer.save() reader = ExcelFile(path) recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.frame, recons) recons = reader.parse('test2', index_col=0) tm.assert_frame_equal(self.tsframe, recons) np.testing.assert_equal(2, len(reader.sheet_names)) np.testing.assert_equal('test1', reader.sheet_names[0]) np.testing.assert_equal('test2', reader.sheet_names[1]) os.remove(path)
def test_excel_stop_iterator(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xls")) parsed = excel_data.parse("Sheet1") expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"]) tm.assert_frame_equal(parsed, expected)
def _check_extension_sheets(self, ext): path = '__tmp_to_excel_from_excel_sheets__.' + ext self.frame['A'][:5] = nan self.frame.to_excel(path,'test1') self.frame.to_excel(path,'test1', cols=['A', 'B']) self.frame.to_excel(path,'test1', header=False) self.frame.to_excel(path,'test1', index=False) # Test writing to separate sheets writer = ExcelWriter(path) self.frame.to_excel(writer,'test1') self.tsframe.to_excel(writer,'test2') writer.save() reader = ExcelFile(path) recons = reader.parse('test1',index_col=0) tm.assert_frame_equal(self.frame, recons) recons = reader.parse('test2',index_col=0) tm.assert_frame_equal(self.tsframe, recons) np.testing.assert_equal(2, len(reader.sheet_names)) np.testing.assert_equal('test1', reader.sheet_names[0]) np.testing.assert_equal('test2', reader.sheet_names[1]) os.remove(path)
def test_excel_cell_error_na(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, "test3.xls")) parsed = excel_data.parse("Sheet1") expected = DataFrame([[np.nan]], columns=["Test"]) tm.assert_frame_equal(parsed, expected)
def test_excel_cell_error_na(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls')) parsed = excel_data.parse('Sheet1') expected = DataFrame([[np.nan]], columns=['Test']) tm.assert_frame_equal(parsed, expected)
def test_to_excel_unicode_filename(self): _skip_if_no_excelsuite() for ext in ["xls", "xlsx"]: filename = u"\u0192u." + ext try: f = open(filename, "wb") except UnicodeEncodeError: raise nose.SkipTest("no unicode file names on this system") else: f.close() df = DataFrame( [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=["A", "B"], columns=["X", "Y", "Z"], ) df.to_excel(filename, "test1", float_format="%.2f") reader = ExcelFile(filename) rs = reader.parse("test1", index_col=None) xp = DataFrame( [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=["A", "B"], columns=["X", "Y", "Z"] ) tm.assert_frame_equal(rs, xp) os.remove(filename)
def test_to_excel_unicode_filename(self): _skip_if_no_excelsuite() for ext in ['xls', 'xlsx']: filename = '\u0192u.' + ext try: f = open(filename, 'wb') except UnicodeEncodeError: raise nose.SkipTest('no unicode file names on this system') else: f.close() df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean(filename) as filename: df.to_excel(filename, 'test1', float_format='%.2f') reader = ExcelFile(filename) rs = reader.parse('test1', index_col=None) xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp)
def _check_extension_sheets(self, ext): path = "__tmp_to_excel_from_excel_sheets__." + ext self.frame["A"][:5] = nan self.frame.to_excel(path, "test1") self.frame.to_excel(path, "test1", cols=["A", "B"]) self.frame.to_excel(path, "test1", header=False) self.frame.to_excel(path, "test1", index=False) # Test writing to separate sheets writer = ExcelWriter(path) self.frame.to_excel(writer, "test1") self.tsframe.to_excel(writer, "test2") writer.save() reader = ExcelFile(path) recons = reader.parse("test1", index_col=0) tm.assert_frame_equal(self.frame, recons) recons = reader.parse("test2", index_col=0) tm.assert_frame_equal(self.tsframe, recons) np.testing.assert_equal(2, len(reader.sheet_names)) np.testing.assert_equal("test1", reader.sheet_names[0]) np.testing.assert_equal("test2", reader.sheet_names[1]) os.remove(path)
def test_excel_stop_iterator(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls')) parsed = excel_data.parse('Sheet1') expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1']) assert_frame_equal(parsed, expected)
def test_excel_cell_error_na(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls')) parsed = excel_data.parse('Sheet1') expected = DataFrame([[np.nan]], columns=['Test']) assert_frame_equal(parsed, expected)
def getExcelChunck(file, ws, drange, rid=-1, cid=-1): xls = ExcelFile(file) df = xls.parse(ws) # get the range from expression # B4H4 m = re.search('([A-Z]+)([0-9]+)([A-Z]+)([0-9]+)', drange) c1 = sord(m.group(1)) c2 = sord(m.group(3)) + 1 r1 = int(m.group(2)) - 2 r2 = int(m.group(4)) - 2 df2 = df.ix[r1:r2, c1:c2] if (rid >= 0): rh = int(rid) - 2 df2.columns = df.ix[rh, c1:c2] df2.columns = df2.columns.map( lambda x: str(x).strip().replace('.0', '')) if (cid >= 0): ch = sord(cid) df2.index = df.ix[r1:r2, ch] df2.index = df2.index.map(lambda x: str(x).strip().replace('.0', '')) return (df2)
def test_to_excel_unicode_filename(self): _skip_if_no_excelsuite() for ext in ['xls', 'xlsx']: filename = u'\u0192u.' + ext try: f = open(filename, 'wb') except UnicodeEncodeError: raise nose.SkipTest('no unicode file names on this system') else: f.close() df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) df.to_excel(filename, 'test1', float_format='%.2f') reader = ExcelFile(filename) rs = reader.parse('test1', index_col=None) xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp) os.remove(filename)
def _check_extension_mixed(self, ext): path = '__tmp_to_excel_from_excel_mixed__.' + ext with ensure_clean(path) as path: self.mixed_frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.mixed_frame, recons)
def test_excel_table(self): pth = os.path.join(self.dirpath, 'test.xls') xls = ExcelFile(pth) df = xls.parse('Sheet1') df2 = read_csv(self.csv1) df3 = xls.parse('Sheet2', skiprows=[1]) assert_frame_equal(df, df2) assert_frame_equal(df3, df2)
def _check_extension_mixed(self, ext): path = '__tmp_to_excel_from_excel_mixed__.' + ext self.mixed_frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0, has_index_names=True) tm.assert_frame_equal(self.mixed_frame, recons) os.remove(path)
def _check_extension_mixed(self, ext): path = '__tmp_to_excel_from_excel_mixed__.' + ext self.mixed_frame.to_excel(path,'test1') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0, has_index_names=True) tm.assert_frame_equal(self.mixed_frame, recons) os.remove(path)
def _check_extension_mixed(self, ext): path = "__tmp_to_excel_from_excel_mixed__." + ext self.mixed_frame.to_excel(path, "test1") reader = ExcelFile(path) recons = reader.parse("test1", index_col=0) tm.assert_frame_equal(self.mixed_frame, recons) os.remove(path)
def test_excel_cell_error_na(self): try: import xlrd except ImportError: raise nose.SkipTest('xlrd not installed, skipping') excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls')) parsed = excel_data.parse('Sheet1') expected = DataFrame([[np.nan]], columns=['Test']) assert_frame_equal(parsed, expected)
def _check_extension_tsframe(self, ext): path = '__tmp_to_excel_from_excel_tsframe__.' + ext df = tm.makeTimeDataFrame()[:5] with ensure_clean(path) as path: df.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(df, recons)
def test_xlsx_table(self): _skip_if_no_openpyxl() pth = os.path.join(self.dirpath, 'test.xlsx') xlsx = ExcelFile(pth) df = xlsx.parse('Sheet1', index_col=0, parse_dates=True) df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True) assert_frame_equal(df, df2) assert_frame_equal(df3, df2)
def test_excel_stop_iterator(self): try: import xlrd except ImportError: raise nose.SkipTest('xlrd not installed, skipping') excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls')) parsed = excel_data.parse('Sheet1') expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1']) assert_frame_equal(parsed, expected)
def test_excel_stop_iterator(self): try: import xlrd except ImportError: raise nose.SkipTest('xlrd not installed, skipping') excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls')) parsed = excel_data.parse('Sheet1') expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1']) assert_frame_equal(parsed, expected)
def test_excel_stop_iterator(self): try: import xlrd except ImportError: raise nose.SkipTest("xlrd not installed, skipping") excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xls")) parsed = excel_data.parse("Sheet1") expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"]) assert_frame_equal(parsed, expected)
def _check_extension(self, ext): path = '__tmp_to_excel_from_excel__.' + ext self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # test roundtrip self.frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', index=False) reader = ExcelFile(path) recons = reader.parse('test1', index_col=None) recons.index = self.frame.index tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', na_rep='NA') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0, na_values=['NA']) tm.assert_frame_equal(self.frame, recons) os.remove(path)
def _check_extension_tsframe(self, ext): path = "__tmp_to_excel_from_excel_tsframe__." + ext df = tm.makeTimeDataFrame()[:5] df.to_excel(path, "test1") reader = ExcelFile(path) recons = reader.parse("test1") tm.assert_frame_equal(df, recons) os.remove(path)
def test_excel_roundtrip_bool(self): _skip_if_no_openpyxl() # Test roundtrip np.bool8, does not seem to work for xls path = '__tmp_excel_roundtrip_bool__.xlsx' frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(frame, recons) os.remove(path)
def test_excel_roundtrip_bool(self): _skip_if_no_openpyxl() # Test roundtrip np.bool8, does not seem to work for xls path = "__tmp_excel_roundtrip_bool__.xlsx" frame = DataFrame(np.random.randn(10, 2)) >= 0 frame.to_excel(path, "test1") reader = ExcelFile(path) recons = reader.parse("test1") tm.assert_frame_equal(frame, recons) os.remove(path)
def test_to_excel_periodindex(self): _skip_if_no_excelsuite() for ext in ['xls', 'xlsx']: path = '__tmp_to_excel_periodindex__.' + ext frame = self.tsframe xp = frame.resample('M', kind='period') xp.to_excel(path, 'sht1') reader = ExcelFile(path) rs = reader.parse('sht1', index_col=0, parse_dates=True) tm.assert_frame_equal(xp, rs.to_period('M')) os.remove(path)
def test_excel_roundtrip_datetime(self): _skip_if_no_xlrd() _skip_if_no_xlwt() # datetime.date, not sure what to test here exactly path = '__tmp_excel_roundtrip_datetime__.xls' tsf = self.tsframe.copy() tsf.index = [x.date() for x in self.tsframe.index] tsf.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(self.tsframe, recons) os.remove(path)
def test_excel_roundtrip_datetime(self): _skip_if_no_xlrd() _skip_if_no_xlwt() # datetime.date, not sure what to test here exactly path = "__tmp_excel_roundtrip_datetime__.xls" tsf = self.tsframe.copy() tsf.index = [x.date() for x in self.tsframe.index] tsf.to_excel(path, "test1") reader = ExcelFile(path) recons = reader.parse("test1") tm.assert_frame_equal(self.tsframe, recons) os.remove(path)
def test_excel_roundtrip_bool(self): _skip_if_no_openpyxl() # Test roundtrip np.bool8, does not seem to work for xls path = '__tmp_excel_roundtrip_bool__.xlsx' frame = (DataFrame(np.random.randn(10, 2)) >= 0) with ensure_clean(path) as path: frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(frame, recons)
def test_to_excel_periodindex(self): _skip_if_no_excelsuite() for ext in ["xls", "xlsx"]: path = "__tmp_to_excel_periodindex__." + ext frame = self.tsframe xp = frame.resample("M", kind="period") xp.to_excel(path, "sht1") reader = ExcelFile(path) rs = reader.parse("sht1", index_col=0, parse_dates=True) tm.assert_frame_equal(xp, rs.to_period("M")) os.remove(path)
def test_excel_table(self): try: import xlrd except ImportError: raise nose.SkipTest("xlrd not installed, skipping") pth = os.path.join(self.dirpath, "test.xls") xls = ExcelFile(pth) df = xls.parse("Sheet1", index_col=0, parse_dates=True) df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True) assert_frame_equal(df, df2) assert_frame_equal(df3, df2)
def test_excel_table(self): try: import xlrd except ImportError: raise nose.SkipTest('xlrd not installed, skipping') pth = os.path.join(self.dirpath, 'test.xls') xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True) df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True) assert_frame_equal(df, df2) assert_frame_equal(df3, df2)
def test_xlsx_table(self): try: import openpyxl except ImportError: raise nose.SkipTest('openpyxl not installed, skipping') pth = os.path.join(self.dirpath, 'test.xlsx') xlsx = ExcelFile(pth) df = xlsx.parse('Sheet1', index_col=0, parse_dates=True) df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True) assert_frame_equal(df, df2) assert_frame_equal(df3, df2)
def test_to_excel(self): try: import xlwt import xlrd import openpyxl except ImportError: raise nose.SkipTest path = "__tmp__.xlsx" self.panel.to_excel(path) reader = ExcelFile(path) for item, df in self.panel.iteritems(): recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf)
def test_excel_roundtrip_datetime(self): _skip_if_no_xlrd() _skip_if_no_xlwt() # datetime.date, not sure what to test here exactly path = '__tmp_excel_roundtrip_datetime__.xls' tsf = self.tsframe.copy() with ensure_clean(path) as path: tsf.index = [x.date() for x in self.tsframe.index] tsf.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(self.tsframe, recons)
def test_to_excel(self): try: import xlwt import xlrd import openpyxl except ImportError: raise nose.SkipTest path = '__tmp__.xlsx' self.panel.to_excel(path) reader = ExcelFile(path) for item, df in self.panel.iteritems(): recdf = reader.parse(str(item),index_col=0) assert_frame_equal(df, recdf)
def test_excel_read_buffer(self): _skip_if_no_xlrd() _skip_if_no_openpyxl() pth = os.path.join(self.dirpath, 'test.xls') f = open(pth, 'rb') xls = ExcelFile(f) # it works xls.parse('Sheet1', index_col=0, parse_dates=True) pth = os.path.join(self.dirpath, 'test.xlsx') f = open(pth, 'rb') xl = ExcelFile(f) df = xl.parse('Sheet1', index_col=0, parse_dates=True)
def test_parse_cols_int(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() suffix = ["", "x"] for s in suffix: pth = os.path.join(self.dirpath, "test.xls%s" % s) xls = ExcelFile(pth) df = xls.parse("Sheet1", index_col=0, parse_dates=True, parse_cols=3) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=["A", "B", "C"]) df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True, parse_cols=3) tm.assert_frame_equal(df, df2) tm.assert_frame_equal(df3, df2)
def test_parse_cols_str(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() suffix = ['', 'x'] for s in suffix: pth = os.path.join(self.dirpath, 'test.xls%s' % s) xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols='A:D') df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['A', 'B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols='A:D') tm.assert_frame_equal( df, df2, check_names=False ) # TODO add index to xls, read xls ignores index name ? tm.assert_frame_equal(df3, df2, check_names=False) del df, df2, df3 df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols='A,C,D') df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols='A,C,D') tm.assert_frame_equal( df, df2, check_names=False) # TODO add index to xls file tm.assert_frame_equal(df3, df2, check_names=False) del df, df2, df3 df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols='A,C:D') df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols='A,C:D') tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False)
def _check_extension_int64(self, ext): path = '__tmp_to_excel_from_excel_int64__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test np.int64, values read come back as float frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np.int64) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np.int64) tm.assert_frame_equal(frame, recons, check_dtype=False)
def _check_extension_bool(self, ext): path = '__tmp_to_excel_from_excel_bool__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test reading/writing np.bool8, roundtrip only works for xlsx frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np.bool8) tm.assert_frame_equal(frame, recons)
def test_to_excel_float_format(self): _skip_if_no_excelsuite() for ext in ['xls', 'xlsx']: filename = '__tmp_to_excel_float_format__.' + ext df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) df.to_excel(filename, 'test1', float_format='%.2f') reader = ExcelFile(filename) rs = reader.parse('test1', index_col=None) xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp) os.remove(filename)
def test_excel_roundtrip_indexname(self): _skip_if_no_xlrd() _skip_if_no_xlwt() path = '%s.xls' % tm.rands(10) df = DataFrame(np.random.randn(10, 4)) df.index.name = 'foo' with ensure_clean(path) as path: df.to_excel(path) xf = ExcelFile(path) result = xf.parse(xf.sheet_names[0], index_col=0) tm.assert_frame_equal(result, df) self.assertEqual(result.index.name, 'foo')
def test_to_excel(self): try: import os import xlwt import xlrd import openpyxl from pandas.io.parsers import ExcelFile except ImportError: raise nose.SkipTest for ext in ['xls', 'xlsx']: path = '__tmp__.' + ext self.panel.to_excel(path) reader = ExcelFile(path) for item, df in self.panel.iteritems(): recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf) os.remove(path)
def _check_extension_colaliases(self, ext): path = '__tmp_to_excel_from_excel_aliases__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # column aliases col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_excel(path, 'test1', header=col_aliases) reader = ExcelFile(path) rs = reader.parse('test1', index_col=0) xp = self.frame2.copy() xp.columns = col_aliases tm.assert_frame_equal(xp, rs)
def test_specify_kind_xlsx(self): _skip_if_no_openpyxl() xlsx_file = os.path.join(self.dirpath, 'test.xlsx') xls_file = os.path.join(self.dirpath, 'test.xls') self.assertRaises(Exception, ExcelFile, xls_file, kind='xlsx') ExcelFile(open(xlsx_file, 'rb'), kind='xlsx') self.assertRaises(Exception, ExcelFile, open(xls_file, 'rb'), kind='xlsx')
def _check_excel_multiindex(self, ext): path = '__tmp_to_excel_multiindex__' + ext + '__.' + ext frame = self.frame old_index = frame.index arrays = np.arange(len(old_index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index frame.to_excel(path, 'test1', header=False) frame.to_excel(path, 'test1', cols=['A', 'B']) # round trip frame.to_excel(path, 'test1') reader = ExcelFile(path) df = reader.parse('test1', index_col=[0, 1], parse_dates=False) tm.assert_frame_equal(frame, df) self.assertEqual(frame.index.names, df.index.names) self.frame.index = old_index # needed if setUP becomes a classmethod os.remove(path)
def test_excel_roundtrip_indexname(self): _skip_if_no_xlrd() _skip_if_no_xlwt() path = '%s.xls' % tm.rands(10) df = DataFrame(np.random.randn(10, 4)) df.index.name = 'foo' df.to_excel(path) xf = ExcelFile(path) result = xf.parse(xf.sheet_names[0], index_col=0) tm.assert_frame_equal(result, df) self.assertEqual(result.index.name, 'foo') try: os.remove(path) except os.error: pass
def test_parse_cols_int(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() suffix = ['', 'x'] for s in suffix: pth = os.path.join(self.dirpath, 'test.xls%s' % s) xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols=3) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['A', 'B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols=3) tm.assert_frame_equal(df, df2) tm.assert_frame_equal(df3, df2)
def saving_simulation(self, file_path=None): """ CANNOT BE COMPLETED FOR NOW BECAUSE OF A BUG OF PANDAS """ if file_path is None: raise Exception( 'A complete path to the file should be provided. DO not hesitate to use os.path.join' ) writer = ExcelFile(file_path) print writer cohorts_list_base = [ self.cohorts, self.cohorts_alt, self.percapita_pv, self.percapita_pv_alt, self.aggregate_pv, self.aggregate_pv_alt ] cohorts_list = [] for df in cohorts_list_base: try: name = df.name cohorts_list.append(df) print 'good to go' except: print 'no such cohort in this simulation' print len(cohorts_list) df_dict = dict( (dataframe.name, dataframe) for dataframe in cohorts_list) print df_dict for name, attribute in df_dict.iteritems(): print 'new dataframe' #try: attribute.to_excel(writer, sheet_name=name) #except: # print 'BUG ENCOUNTERED' writer.save()