def test_excel_table_sheet_by_index(self, read_ext, df_ref): excel = ExcelFile('test1' + read_ext) df1 = pd.read_excel(excel, 0, index_col=0) df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0) tm.assert_frame_equal(df1, df_ref, check_names=False) tm.assert_frame_equal(df2, df_ref, check_names=False) df1 = excel.parse(0, index_col=0) df2 = excel.parse(1, skiprows=[1], index_col=0) tm.assert_frame_equal(df1, df_ref, check_names=False) tm.assert_frame_equal(df2, df_ref, check_names=False) df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1) tm.assert_frame_equal(df3, df1.iloc[:-1]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1) tm.assert_frame_equal(df3, df4) df3 = excel.parse(0, index_col=0, skipfooter=1) tm.assert_frame_equal(df3, df1.iloc[:-1]) import xlrd # will move to engine-specific tests as new ones are added with pytest.raises(xlrd.XLRDError): pd.read_excel(excel, 'asdf')
def test_sheets(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_from_excel_sheets__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test writing to separate sheets writer = ExcelWriter(path) self.frame.to_excel(writer, 'test1') self.tsframe.to_excel(writer, 'test2') writer.save() reader = ExcelFile(path) recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.frame, recons) recons = reader.parse('test2', index_col=0) tm.assert_frame_equal(self.tsframe, recons) np.testing.assert_equal(2, len(reader.sheet_names)) np.testing.assert_equal('test1', reader.sheet_names[0]) np.testing.assert_equal('test2', reader.sheet_names[1])
def test_roundtrip_indexlabels(self): _skip_if_no_xlrd() with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # test index_label frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label=['test']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel( path, 'test1', index_label=['test', 'dummy', 'dummy2']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label='test') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertAlmostEqual(frame.index.names, recons.index.names)
def test_to_excel_multiindex_dates(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext # try multiindex with dates tsframe = self.tsframe old_index = tsframe.index new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) with ensure_clean(path) as path: tsframe.to_excel(path, 'test1', index_label=['time', 'foo']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons, check_names=False) self.assertEquals(recons.index.names, ('time', 'foo')) # infer index tsframe.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(tsframe, recons) self.tsframe.index = old_index # needed if setUP becomes classmethod
class ExcelExtractor(Extractor): ''' An extractor for excel files. One sheet only for now. Expects column names in first row, rest of rows mapped 1:1 to incoming table rows. Unique identifier (or unique for domain) in first column. ''' def __init__(self, incoming_table_class, file_name): ''' Constructor ''' self._incoming_table_class = incoming_table_class self.file_name = file_name self.workbook = ExcelFile(os.path.join(conf.INPUT_DIR, file_name)) super(ExcelExtractor, self).__init__(self._incoming_table_class) def _get_workbook_rowdicts(self): ''' returns list of key-value dicts for all rows in sheet, with keys in first row. empty values are removed. ''' rows = self.workbook.parse().to_dict(outtype='records') rows_ret = list() for row in rows: ret = dict((k, v) for k, v in row.iteritems() if notnull(v)) rows_ret.append(ret) return rows_ret def _get_workbook_keys(self): ''' returns list of key-value dicts from keys in first row ''' return self.workbook.parse().to_dict().keys() def do_extract(self): db_col_keys = [k for k in self._get_workbook_keys() if k in self._get_db_cols] hstore_keys = [h for h in self._get_workbook_keys() if h not in self._get_db_cols] for row in self._get_workbook_rowdicts(): db_col_dict = dict((k, v) for k, v in row.iteritems() if k in db_col_keys) hstore_col_dict = dict((k, unicode(v)) for k, v in row.iteritems() if k in hstore_keys) insert_dict = db_col_dict insert_dict[self._get_hstore_db_col] = hstore_col_dict self._incoming_table_class.create(**insert_dict) def do_cleanup(self): delete_q = self._incoming_table_class.delete() rows = delete_q.execute() logger.info('Deleted %d records in incoming data table %s' % (rows, self._incoming_table_class._meta.db_table))
def test_sheet_name(self, read_ext, df_ref): filename = "test1" sheet_name = "Sheet1" excel = ExcelFile(filename + read_ext) df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc df2_parse = excel.parse(index_col=0, sheet_name=sheet_name) tm.assert_frame_equal(df1_parse, df_ref, check_names=False) tm.assert_frame_equal(df2_parse, df_ref, check_names=False)
def test_excel_passes_na(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xlsx")) parsed = excel_data.parse("Sheet1", keep_default_na=False, na_values=["apple"]) expected = DataFrame([["NA"], [1], ["NA"], [np.nan], ["rabbit"]], columns=["Test"]) tm.assert_frame_equal(parsed, expected) parsed = excel_data.parse("Sheet1", keep_default_na=True, na_values=["apple"]) expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"]) tm.assert_frame_equal(parsed, expected)
def test_roundtrip_indexlabels(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_from_excel_indexlabels__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # test index_label frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label=['test']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel( path, 'test1', index_label=['test', 'dummy', 'dummy2']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label='test') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) # test index_labels in same row as column names path = '%s.%s' % (tm.rands(10), ext) with ensure_clean(path) as path: self.frame.to_excel(path, 'test1', cols=['A', 'B', 'C', 'D'], index=False) # take 'A' and 'B' as indexes (they are in same row as cols 'C', # 'D') df = self.frame.copy() df = df.set_index(['A', 'B']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1]) tm.assert_frame_equal(df, recons)
def test_excel_read_buffer(self): _skip_if_no_xlrd() _skip_if_no_openpyxl() pth = os.path.join(self.dirpath, "test.xls") f = open(pth, "rb") xls = ExcelFile(f) # it works xls.parse("Sheet1", index_col=0, parse_dates=True) pth = os.path.join(self.dirpath, "test.xlsx") f = open(pth, "rb") xl = ExcelFile(f) df = xl.parse("Sheet1", index_col=0, parse_dates=True)
def test_excel_read_buffer(self): _skip_if_no_xlrd() _skip_if_no_openpyxl() pth = os.path.join(self.dirpath, 'test.xls') f = open(pth, 'rb') xls = ExcelFile(f) # it works xls.parse('Sheet1', index_col=0, parse_dates=True) pth = os.path.join(self.dirpath, 'test.xlsx') f = open(pth, 'rb') xl = ExcelFile(f) xl.parse('Sheet1', index_col=0, parse_dates=True)
def test_excel_passes_na(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xlsx')) parsed = excel_data.parse('Sheet1', keep_default_na=False, na_values=['apple']) expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) parsed = excel_data.parse('Sheet1', keep_default_na=True, na_values=['apple']) expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected)
def test_parse_cols_int(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() suffix = ["", "x"] for s in suffix: pth = os.path.join(self.dirpath, "test.xls%s" % s) xls = ExcelFile(pth) df = xls.parse("Sheet1", index_col=0, parse_dates=True, parse_cols=3) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=["A", "B", "C"]) df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True, parse_cols=3) tm.assert_frame_equal(df, df2, check_names=False) # TODO add index to xls file) tm.assert_frame_equal(df3, df2, check_names=False)
def test_excel_table(self): _skip_if_no_xlrd() pth = os.path.join(self.dirpath, "test.xls") xls = ExcelFile(pth) df = xls.parse("Sheet1", index_col=0, parse_dates=True) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) df4 = xls.parse("Sheet1", index_col=0, parse_dates=True, skipfooter=1) df5 = xls.parse("Sheet1", index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5)
def _check_extension_indexlabels(self, ext): path = "__tmp_to_excel_from_excel_indexlabels__." + ext with ensure_clean(path) as path: self.frame["A"][:5] = nan self.frame.to_excel(path, "test1") self.frame.to_excel(path, "test1", cols=["A", "B"]) self.frame.to_excel(path, "test1", header=False) self.frame.to_excel(path, "test1", index=False) # test index_label frame = DataFrame(np.random.randn(10, 2)) >= 0 frame.to_excel(path, "test1", index_label=["test"]) reader = ExcelFile(path) recons = reader.parse("test1", index_col=0).astype(np.int64) frame.index.names = ["test"] self.assertEqual(frame.index.names, recons.index.names) frame = DataFrame(np.random.randn(10, 2)) >= 0 frame.to_excel(path, "test1", index_label=["test", "dummy", "dummy2"]) reader = ExcelFile(path) recons = reader.parse("test1", index_col=0).astype(np.int64) frame.index.names = ["test"] self.assertEqual(frame.index.names, recons.index.names) frame = DataFrame(np.random.randn(10, 2)) >= 0 frame.to_excel(path, "test1", index_label="test") reader = ExcelFile(path) recons = reader.parse("test1", index_col=0).astype(np.int64) frame.index.names = ["test"] self.assertEqual(frame.index.names, recons.index.names) # test index_labels in same row as column names path = "%s.xls" % tm.rands(10) with ensure_clean(path) as path: self.frame.to_excel(path, "test1", cols=["A", "B", "C", "D"], index=False) # take 'A' and 'B' as indexes (they are in same row as cols 'C', # 'D') df = self.frame.copy() df = df.set_index(["A", "B"]) reader = ExcelFile(path) recons = reader.parse("test1", index_col=[0, 1]) tm.assert_frame_equal(df, recons)
def test_excel_cell_error_na(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls')) parsed = excel_data.parse('Sheet1') expected = DataFrame([[np.nan]], columns=['Test']) tm.assert_frame_equal(parsed, expected)
def test_to_excel_unicode_filename(self): _skip_if_no_excelsuite() for ext in ["xls", "xlsx"]: filename = u"\u0192u." + ext try: f = open(filename, "wb") except UnicodeEncodeError: raise nose.SkipTest("no unicode file names on this system") else: f.close() df = DataFrame( [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=["A", "B"], columns=["X", "Y", "Z"], ) with ensure_clean(filename) as filename: df.to_excel(filename, "test1", float_format="%.2f") reader = ExcelFile(filename) rs = reader.parse("test1", index_col=None) xp = DataFrame( [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=["A", "B"], columns=["X", "Y", "Z"] ) tm.assert_frame_equal(rs, xp)
def test_excel_stop_iterator(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls')) parsed = excel_data.parse('Sheet1') expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1']) tm.assert_frame_equal(parsed, expected)
def test_excel_stop_iterator(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xls")) parsed = excel_data.parse("Sheet1") expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"]) tm.assert_frame_equal(parsed, expected)
def test_to_excel_unicode_filename(self): _skip_if_no_xlrd() ext = self.ext filename = u('\u0192u.') + ext try: f = open(filename, 'wb') except UnicodeEncodeError: raise nose.SkipTest('no unicode file names on this system') else: f.close() df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean(filename) as filename: df.to_excel(filename, 'test1', float_format='%.2f') reader = ExcelFile(filename) rs = reader.parse('test1', index_col=None) xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp)
def check_excel_table_sheet_by_index(self, filename, csvfile): import xlrd pth = os.path.join(self.dirpath, filename) xls = ExcelFile(pth) df = xls.parse(0, index_col=0, parse_dates=True) df2 = self.read_csv(csvfile, index_col=0, parse_dates=True) df3 = xls.parse(1, skiprows=[1], index_col=0, parse_dates=True) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) df4 = xls.parse(0, index_col=0, parse_dates=True, skipfooter=1) df5 = xls.parse(0, index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5) self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
def test_mixed(self): _skip_if_no_xlrd() with ensure_clean(self.ext) as path: self.mixed_frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.mixed_frame, recons)
def _check_extension_mixed(self, ext): path = "__tmp_to_excel_from_excel_mixed__." + ext with ensure_clean(path) as path: self.mixed_frame.to_excel(path, "test1") reader = ExcelFile(path) recons = reader.parse("test1", index_col=0) tm.assert_frame_equal(self.mixed_frame, recons)
def test_inf_roundtrip(self): _skip_if_no_xlrd() frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) with ensure_clean(self.ext) as path: frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(frame, recons)
def test_parse_cols_int(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() suffix = ['xls', 'xlsx', 'xlsm'] for s in suffix: pth = os.path.join(self.dirpath, 'test.%s' % s) xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols=3) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['A', 'B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols=3) # TODO add index to xls file) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False)
def test_tsframe(self): _skip_if_no_xlrd() df = tm.makeTimeDataFrame()[:5] with ensure_clean(self.ext) as path: df.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(df, recons)
def test_xlsx_table(self): _skip_if_no_xlrd() _skip_if_no_openpyxl() pth = os.path.join(self.dirpath, 'test.xlsx') xlsx = ExcelFile(pth) df = xlsx.parse('Sheet1', index_col=0, parse_dates=True) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True) tm.assert_frame_equal(df, df2, check_names=False) # TODO add index to xlsx file tm.assert_frame_equal(df3, df2, check_names=False) df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True, skipfooter=1) df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5)
def _check_extension_tsframe(self, ext): path = "__tmp_to_excel_from_excel_tsframe__." + ext df = tm.makeTimeDataFrame()[:5] with ensure_clean(path) as path: df.to_excel(path, "test1") reader = ExcelFile(path) recons = reader.parse("test1") tm.assert_frame_equal(df, recons)
def test_float_types(self): _skip_if_no_xlrd() for np_type in (np.float16, np.float32, np.float64): with ensure_clean(self.ext) as path: # Test np.float values read come back as float. frame = DataFrame(np.random.random_sample(10), dtype=np_type) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np_type) tm.assert_frame_equal(frame, recons, check_dtype=False)
def test_bool_types(self): _skip_if_no_xlrd() for np_type in (np.bool8, np.bool_): with ensure_clean(self.ext) as path: # Test np.bool values read come back as float. frame = (DataFrame([1, 0, True, False], dtype=np_type)) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np_type) tm.assert_frame_equal(frame, recons)
def test_to_excel_periodindex(self): _skip_if_no_xlrd() frame = self.tsframe xp = frame.resample('M', kind='period') with ensure_clean(self.ext) as path: xp.to_excel(path, 'sht1') reader = ExcelFile(path) rs = reader.parse('sht1', index_col=0, parse_dates=True) tm.assert_frame_equal(xp, rs.to_period('M'))
def test_excel_sheet_by_name_raise(self): _skip_if_no_xlrd() import xlrd ext = self.ext pth = os.path.join(self.dirpath, 'testit.{0}'.format(ext)) with ensure_clean(pth) as pth: gt = DataFrame(np.random.randn(10, 2)) gt.to_excel(pth) xl = ExcelFile(pth) df = xl.parse(0) tm.assert_frame_equal(gt, df) self.assertRaises(xlrd.XLRDError, xl.parse, '0')
def _check_extension_sheets(self, ext): path = '__tmp_to_excel_from_excel_sheets__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test writing to separate sheets writer = ExcelWriter(path) self.frame.to_excel(writer, 'test1') self.tsframe.to_excel(writer, 'test2') writer.save() reader = ExcelFile(path) recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.frame, recons) recons = reader.parse('test2', index_col=0) tm.assert_frame_equal(self.tsframe, recons) np.testing.assert_equal(2, len(reader.sheet_names)) np.testing.assert_equal('test1', reader.sheet_names[0]) np.testing.assert_equal('test2', reader.sheet_names[1])
def test_sheets(self): _skip_if_no_xlrd() with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', columns=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test writing to separate sheets writer = ExcelWriter(path) self.frame.to_excel(writer, 'test1') self.tsframe.to_excel(writer, 'test2') writer.save() reader = ExcelFile(path) recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.frame, recons) recons = reader.parse('test2', index_col=0) tm.assert_frame_equal(self.tsframe, recons) np.testing.assert_equal(2, len(reader.sheet_names)) np.testing.assert_equal('test1', reader.sheet_names[0]) np.testing.assert_equal('test2', reader.sheet_names[1])
def _check_excel_multiindex_dates(self, ext): path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext # try multiindex with dates tsframe = self.tsframe old_index = tsframe.index new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) with ensure_clean(path) as path: tsframe.to_excel(path, 'test1', index_label=['time', 'foo']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons, check_names=False) self.assertEquals(recons.index.names, ['time', 'foo']) # infer index tsframe.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(tsframe, recons) self.tsframe.index = old_index # needed if setUP becomes classmethod
def test_roundtrip_indexlabels(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_from_excel_indexlabels__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # test index_label frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label=['test']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel( path, 'test1', index_label=['test', 'dummy', 'dummy2']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label='test') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertAlmostEqual(frame.index.names, recons.index.names)
def test_parse_cols_str(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() suffix = ['xls', 'xlsx', 'xlsm'] for s in suffix: pth = os.path.join(self.dirpath, 'test.%s' % s) xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols='A:D') df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['A', 'B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols='A:D') # TODO add index to xls, read xls ignores index name ? tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) del df, df2, df3 df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols='A,C,D') df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols='A,C,D') # TODO add index to xls file tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) del df, df2, df3 df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols='A,C:D') df2 = read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols='A,C:D') tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False)
def read_benchmarks(excel_file: ExcelFile) -> pd.DataFrame: """ :param excel_file: (ExcelFile) :return melted_benchmarks: (DataFrame) code | (String) date | (Datetime) price_index | (float) """ # Read excel file. raw_benchmarks = excel_file.parse(BENCHMARK, skiprows=8) raw_macro_from_monthly = excel_file.parse(MACRO_MONTHLY, skiprows=8) # Use only CD91 raw_risk_free = raw_macro_from_monthly.loc[ raw_macro_from_monthly[ITEM_NAME] == '시장금리:CD유통수익률(91)(%)', :] # Remove unnecessary columns, for example, Symbol, Kind, Item, Item Name, Frequency raw_benchmarks = raw_benchmarks.drop(columns=BENCHMARK_UNNECESSARY_COLUMNS) raw_risk_free = raw_risk_free.drop(columns=BENCHMARK_UNNECESSARY_COLUMNS) raw_risk_free[SYMBOL_NAME] = CD91 # Melt benchmarks. Symbole name -> code, column names -> date melted_benchmarks = _melt(raw_benchmarks, PRICE_INDEX) melted_risk_free = _melt(raw_risk_free, PRICE_INDEX) # Calculate a risk free rate index melted_risk_free[PRICE_INDEX] = (((melted_risk_free[PRICE_INDEX] / 100) + 1)**(1 / 12)).cumprod() melted_benchmarks = pd.concat([melted_benchmarks, melted_risk_free]) # Sort by code and date melted_benchmarks = melted_benchmarks.sort_values( [CODE, DATE]).reset_index(drop=True) return melted_benchmarks
def test_excel_roundtrip_indexname(self): _skip_if_no_xlrd() df = DataFrame(np.random.randn(10, 4)) df.index.name = 'foo' with ensure_clean(self.ext) as path: df.to_excel(path, merge_cells=self.merge_cells) xf = ExcelFile(path) result = xf.parse(xf.sheet_names[0], index_col=0, has_index_names=self.merge_cells) tm.assert_frame_equal(result, df) self.assertEqual(result.index.name, 'foo')
def test_to_excel_float_format(self): _skip_if_no_xlrd() df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean(self.ext) as filename: df.to_excel(filename, 'test1', float_format='%.2f') reader = ExcelFile(filename) rs = reader.parse('test1', index_col=None) xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp)
def test_excel_roundtrip_indexname(self): _skip_if_no_xlrd() path = '%s.%s' % (tm.rands(10), self.ext) df = DataFrame(np.random.randn(10, 4)) df.index.name = 'foo' with ensure_clean(path) as path: df.to_excel(path) xf = ExcelFile(path) result = xf.parse(xf.sheet_names[0], index_col=0) tm.assert_frame_equal(result, df) self.assertEqual(result.index.name, 'foo')
def _check_extension_bool(self, ext): path = '__tmp_to_excel_from_excel_bool__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test reading/writing np.bool8, roundtrip only works for xlsx frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np.bool8) tm.assert_frame_equal(frame, recons)
def _check_extension_int64(self, ext): path = '__tmp_to_excel_from_excel_int64__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test np.int64, values read come back as float frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np.int64) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np.int64) tm.assert_frame_equal(frame, recons, check_dtype=False)
def test_to_excel_multiindex_dates(self): _skip_if_no_xlrd() # try multiindex with dates tsframe = self.tsframe.copy() new_index = [tsframe.index, np.arange(len(tsframe.index))] tsframe.index = MultiIndex.from_arrays(new_index) with ensure_clean(self.ext) as path: tsframe.index.names = ['time', 'foo'] tsframe.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1], has_index_names=self.merge_cells) tm.assert_frame_equal(tsframe, recons) self.assertEquals(recons.index.names, ('time', 'foo'))
def test_colaliases(self): _skip_if_no_xlrd() with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # column aliases col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_excel(path, 'test1', header=col_aliases) reader = ExcelFile(path) rs = reader.parse('test1', index_col=0) xp = self.frame2.copy() xp.columns = col_aliases tm.assert_frame_equal(xp, rs)
def _check_extension_colaliases(self, ext): path = '__tmp_to_excel_from_excel_aliases__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # column aliases col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_excel(path, 'test1', header=col_aliases) reader = ExcelFile(path) rs = reader.parse('test1', index_col=0) xp = self.frame2.copy() xp.columns = col_aliases tm.assert_frame_equal(xp, rs)
def test_bool_types(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_from_excel_bool_types__.' + ext for np_type in (np.bool8, np.bool_): with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test np.bool values read come back as float. frame = (DataFrame([1, 0, True, False], dtype=np_type)) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np_type) tm.assert_frame_equal(frame, recons)
def test_float_types(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_from_excel_float_types__.' + ext for np_type in (np.float16, np.float32, np.float64): with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test np.float values read come back as float. frame = DataFrame(np.random.random_sample(10), dtype=np_type) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np_type) tm.assert_frame_equal(frame, recons, check_dtype=False)
def _check_excel_multiindex(self, ext): path = '__tmp_to_excel_multiindex__' + ext + '__.' + ext frame = self.frame old_index = frame.index arrays = np.arange(len(old_index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index with ensure_clean(path) as path: frame.to_excel(path, 'test1', header=False) frame.to_excel(path, 'test1', cols=['A', 'B']) # round trip frame.to_excel(path, 'test1') reader = ExcelFile(path) df = reader.parse('test1', index_col=[0, 1], parse_dates=False) tm.assert_frame_equal(frame, df) self.assertEqual(frame.index.names, df.index.names) self.frame.index = old_index # needed if setUP becomes a classmethod
def test_to_excel_multiindex(self): _skip_if_no_xlrd() frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index with ensure_clean(self.ext) as path: frame.to_excel(path, 'test1', header=False) frame.to_excel(path, 'test1', cols=['A', 'B']) # round trip frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) df = reader.parse('test1', index_col=[0, 1], parse_dates=False, has_index_names=self.merge_cells) tm.assert_frame_equal(frame, df) self.assertEqual(frame.index.names, df.index.names)
def test_to_excel_unicode_filename(self): _skip_if_no_xlrd() with ensure_clean(u('\u0192u.') + self.ext) as filename: try: f = open(filename, 'wb') except UnicodeEncodeError: raise nose.SkipTest('no unicode file names on this system') else: f.close() df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) df.to_excel(filename, 'test1', float_format='%.2f') reader = ExcelFile(filename) rs = reader.parse('test1', index_col=None) xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp)
def test_int_types(self): _skip_if_no_xlrd() for np_type in (np.int8, np.int16, np.int32, np.int64): with ensure_clean(self.ext) as path: # Test np.int values read come back as int (rather than float # which is Excel's format). frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') int_frame = frame.astype(int) tm.assert_frame_equal(int_frame, recons) recons2 = read_excel(path, 'test1') tm.assert_frame_equal(int_frame, recons2) # test with convert_float=False comes back as float float_frame = frame.astype(float) recons = read_excel(path, 'test1', convert_float=False) tm.assert_frame_equal(recons, float_frame)
def main(argv): parser = argparse.ArgumentParser(prog='importxls') parser.add_argument('--xls', help='The full pathname of the spread sheet to import') args = parser.parse_args() if args.xls: xl = ExcelFile(args.xls) salaries = xl.parse("FY14 Est Salaries") # print (salaries.columns) # Index([u'Employee Name', u'Position Title', u'Pay Plan', u'Appt Type', u'Auth Hours', u'Estimated Salary', u'Estimated Benefits', # u'Salary & Benefits', u'Estimated Salary/Hr', u'Estimated Benefit/Hr', u'Salary+Benefit/Hr', u'Leave Category', u'LAF'], dtype='object') db = create_engine ('postgresql+psycopg2://' + dbconnect['user'] + ':' + dbconnect['pass'] + '@' + dbconnect['server'] + ':' + dbconnect['port'] + '/' + dbconnect['instance']) Session = sessionmaker(bind=db) for i in salaries.index: if (salaries['Employee Name'][i] != 'COPY EMPLOYEE NAME HERE'): #print (salaries['Employee Name'][i]) #print ("\t%s\t%s " % (salaries['Pay Plan'][i], salaries['Position Title'][i])) session = Session() # Check if the user is already in the database add_person (salaries, i, session)
def test_to_excel_multiindex_no_write_index(self): _skip_if_no_xlrd() # Test writing and re-reading a MI witout the index. GH 5616. # Initial non-MI frame. frame1 = pd.DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]}) # Add a MI. frame2 = frame1.copy() multi_index = pd.MultiIndex.from_tuples([(70, 80), (90, 100)]) frame2.index = multi_index with ensure_clean(self.ext) as path: # Write out to Excel without the index. frame2.to_excel(path, 'test1', index=False) # Read it back in. reader = ExcelFile(path) frame3 = reader.parse('test1') # Test that it is the same as the initial frame. tm.assert_frame_equal(frame1, frame3)
import sys import matplotlib.pyplot as plt from pandas.io.excel import ExcelFile if sys.version_info[0] == 2: from urllib import urlretrieve elif sys.version_info[0] == 3: from urllib.request import urlretrieve # == Get data and read into file gd.xls == # wb_data_query = "http://api.worldbank.org/v2/en/indicator/gc.dod.totl.gd.zs?downloadformat=excel" urlretrieve(wb_data_query, "gd.xls") # == Parse data into a DataFrame == # gov_debt_xls = ExcelFile('gd.xls') govt_debt = gov_debt_xls.parse('Data', index_col=1, na_values=['NA'], skiprows=3) # == Take desired values and plot == # govt_debt = govt_debt.transpose() govt_debt = govt_debt[['AUS', 'DEU', 'FRA', 'USA']] govt_debt = govt_debt[38:] govt_debt.plot(lw=2) plt.show()
def read_macro_daily(excel_file: ExcelFile): # Read excel file. raw_macro_from_daily = excel_file.parse(MACRO_DAILY, skiprows=8) return raw_macro_from_daily
def read_macro_monthly(excel_file: ExcelFile): # Read excel file. raw_macro_from_monthly = excel_file.parse(MACRO_MONTHLY, skiprows=8) return raw_macro_from_monthly
def read_companies(excel_file: ExcelFile) -> pd.DataFrame: """ :param excel_file: (ExcelFile) :return melted_companies: (DataFrame) code | (String) date | (Datetime) name | (String) ... """ # Read excel file. raw_companies = excel_file.parse(COMPANY, skiprows=8) # Rename Symbol -> code, Symbol Name -> name raw_companies = raw_companies.rename(columns={ 'Symbol': CODE, 'Symbol Name': NAME, }) # Save symbol names and item names. names = raw_companies.drop_duplicates(subset=CODE, keep='last').loc[:, [CODE, NAME]] names = names.set_index(CODE) item_name_num = len(raw_companies.loc[:1000, ITEM_NAME].unique()) item_names = raw_companies.loc[:item_name_num - 1, ITEM_NAME] # Remove unnecessary columns, for example, Symbol, Kind, Item, Item Name, Frequency raw_companies = raw_companies.drop(columns=COMPANY_UNNECESSARY_COLUMNS) # Melt every items. melted_companies = pd.DataFrame(columns=[CODE, DATE]) melted_companies = melted_companies.set_index([CODE, DATE]) for index, item_name in enumerate(item_names): # Melt raw_benchmark. Symbole name -> code, column names -> date item_companies = pd.melt(raw_companies.iloc[index::item_name_num, :], id_vars=[CODE], var_name=DATE, value_name=item_name) item_companies[DATE] = pd.to_datetime(item_companies[DATE], format='%Y-$m-%D') item_companies = item_companies.set_index([CODE, DATE]) melted_companies = melted_companies.join(item_companies, how='outer') melted_companies = melted_companies.rename(columns=COMPANY_RENAMES) # Add the names of company. melted_companies = melted_companies.join(names) melted_companies = melted_companies.reset_index() melted_companies = melted_companies.sort_values([CODE, DATE]) # IS_MANAGED, IS_SUSPENDED: '정지' -> True, na -> False melted_companies[IS_MANAGED] = melted_companies[IS_MANAGED].replace( '관리', True) melted_companies[IS_MANAGED] = melted_companies[IS_MANAGED].fillna(False) melted_companies[IS_SUSPENDED] = melted_companies[IS_SUSPENDED].replace( '정지', True) melted_companies[IS_SUSPENDED] = melted_companies[IS_SUSPENDED].fillna( False) # nan -> 0 to_zero_columns = [ CFO, ALLOWANCE_AR_, TRADING_VOLUME, RES_EXP, AR, DIVP, AP, NET_PERSONAL_PURCHASE, NET_NATIONAL_PURCHASE, NET_FINANCIAL_INVESTMENT_PURCHASE, NET_INSTITUTIONAL_FOREIGN_PURCHASE, NET_INSTITUTIONAL_PURCHASE, NET_ETC_FINANCE_PURCHASE, NET_ETC_CORPORATION_PURCHASE, NET_ETC_FOREIGN_PURCHASE, NET_REGISTERED_FOREIGN_PURCHASE, NET_INSURANCE_PURCHASE, NET_PRIVATE_FUND_PURCHASE, NET_PENSION_PURCHASE, NET_FOREIGN_PURCHASE, NET_BANK_PURCHASE, NET_TRUST_PURCHASE, SHORT_SALE_BALANCE, FOREIGN_OWNERSHIP_RATIO ] melted_companies.loc[:, to_zero_columns] = melted_companies.replace( np.nan, 0.0).loc[:, to_zero_columns] # There are no SHORT_SALE_BALANCE before 2016-06-30 melted_companies.loc[melted_companies[DATE] < '2016-06-30', SHORT_SALE_BALANCE] = np.nan # Sort by code and date melted_companies = melted_companies.sort_values([CODE, DATE ]).reset_index(drop=True) return melted_companies
""" Origin: QE by John Stachurski and Thomas J. Sargent Filename: wb_download.py Authors: John Stachurski, Tomohito Okabe LastModified: 29/08/2013 Dowloads data from the World Bank site on GDP per capita and plots result for a subset of countries. """ import pandas as pd import matplotlib.pyplot as plt from pandas.io.excel import ExcelFile import urllib # == Get data and read into file gd.xls == # wb_data_file_dir = "http://api.worldbank.org/datafiles/" file_name = "GC.DOD.TOTL.GD.ZS_Indicator_MetaData_en_EXCEL.xls" url = wb_data_file_dir + file_name urllib.urlretrieve(url, "gd.xls") # == Parse data into a DataFrame == # gov_debt_xls = ExcelFile('gd.xls') govt_debt = gov_debt_xls.parse('Sheet1', index_col=1, na_values=['NA']) # == Take desired values and plot == # govt_debt = govt_debt.transpose() govt_debt = govt_debt[['AUS', 'DEU', 'FRA', 'USA']] govt_debt = govt_debt[36:] govt_debt.plot(lw=2) plt.show()
from financials.tools.indicators import * from pandas.io.excel import ExcelFile import os filename = os.path.join(os.getcwd(), "financials", "tools", "tests", "indicators.xls") xls = ExcelFile(filename) data = xls.parse("Indicators", skiprows = 1, index_col = 0, parse_dates = True) df = data[["O", "H", "L", "C", "V"]] # This is a dictionary. Keys are the function names as string while the values # are 2-tuples with first element being the actual function and the second # element is the list of column names in the Excel File. The dictionary is # iterated and each function result is compared to the actual result in the # Excel File result = { "AD": (AD, ("AD")), "ATR": (ATR, ("ATR")), "SMA": (SMA, ("SMA")), "EMA": (EMA, ("EMA")), "BB": (BB, ("CentralBand", "UpperBand", "LowerBand")), "FS": (FS, ("K_full", "D_full")), "MACD": (MACD, ("MACD", "SignalLine", "Hist")), "OBV": (OBV, ("OBV")), "RSI": (RSI, ("RSI")), "SR": (SR, ("Support", "Resistance")) } def checkAlmostEqual(one, two, precision = 10): """
def test_roundtrip_indexlabels(self): _skip_if_no_xlrd() with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # test index_label frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label=['test'], merge_cells=self.merge_cells) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0, has_index_names=self.merge_cells).astype( np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label=['test', 'dummy', 'dummy2'], merge_cells=self.merge_cells) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0, has_index_names=self.merge_cells).astype( np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label='test', merge_cells=self.merge_cells) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0, has_index_names=self.merge_cells).astype( np.int64) frame.index.names = ['test'] self.assertAlmostEqual(frame.index.names, recons.index.names) with ensure_clean(self.ext) as path: self.frame.to_excel(path, 'test1', cols=['A', 'B', 'C', 'D'], index=False, merge_cells=self.merge_cells) # take 'A' and 'B' as indexes (same row as cols 'C', 'D') df = self.frame.copy() df = df.set_index(['A', 'B']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1]) tm.assert_frame_equal(df, recons, check_less_precise=True)